From 0f67b6d62fa9199460304c7673b21e0dadc18294 Mon Sep 17 00:00:00 2001 From: tancheng Date: Sat, 21 Dec 2024 23:49:18 +0000 Subject: [PATCH 01/19] [feature] CtrlMemDynamicRTL to model control memory with dynamic action handling --- lib/messages.py | 33 ++++++++- mem/ctrl/CtrlMemDynamicRTL.py | 129 ++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 mem/ctrl/CtrlMemDynamicRTL.py diff --git a/lib/messages.py b/lib/messages.py index 946d5ab..ffca2eb 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -237,7 +237,6 @@ def str_func(s): namespace = {'__str__': str_func} ) - #========================================================================= # Ring multi-CGRA data/config/cmd packet #========================================================================= @@ -294,6 +293,38 @@ def str_func(s): namespace = {'__str__': str_func} ) +#========================================================================= +# Ring for delivering ctrl signals and commands across tiles +#========================================================================= + +def mk_ring_across_tiles_pkt(nrouters = 4, ctrl_action_nbits = 2, + ctrl_addr_nbits = 4, ctrl_signal_nbits = 10, + prefix="RingAcrossTilesPacket"): + + IdType = mk_bits(clog2(nrouters)) + OpqType = mk_bits(1) + CtrlActionType = mk_bits(ctrl_action_nbits) + CtrlAddrType = mk_bits(ctrl_addr_nbits) + CtrlSignalType = mk_bits(ctrl_signal_nbits) + + new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_action_nbits}_" \ + f"{ctrl_addr_nbits}_{ctrl_signal_nbits}" + + def str_func(s): + return f"{s.src}>{s.dst}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ + f"{s.ctrl_signal}" + + return mk_bitstruct(new_name, { + 'src': IdType, + 'dst': IdType, + 'opaque': OpqType, + 'ctrl_action': CtrlActionType, + 'ctrl_addr': CtrlAddrType, + 'ctrl_signal': CtrlSignalType, + }, + namespace = {'__str__': str_func} + ) + #========================================================================= # Crossbar (tiles <-> SRAM) packet #========================================================================= diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py new file mode 100644 index 0000000..9b7f02b --- /dev/null +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -0,0 +1,129 @@ +""" +========================================================================== +CtrlMemDynamicRTL.py +========================================================================== +Control memory with dynamic reconfigurability (e.g., receiving control +signals, halt/terminate signals) for each CGRA tile. + +Author : Cheng Tan + Date : Dec 20, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.dstruct.queues import NormalQueue +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ...lib.opt_type import * + +class CtrlMemDynamicRTL(Component): + + def construct(s, CtrlPktType, CtrlActionType, CtrlSignalType, + ctrl_mem_size, ctrl_count_per_iter = 4, + total_ctrl_steps = 4): + + # The total_ctrl_steps indicates the number of steps the ctrl + # signals should proceed. For example, if the number of ctrl + # signals is 4 and they need to repeat 5 times, then the total + # number of steps should be 4 * 5 = 20. + # assert( ctrl_mem_size <= total_ctrl_steps ) + + # Constant + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + PCType = mk_bits(clog2(ctrl_count_per_iter + 1)) + TimeType = mk_bits(clog2(total_ctrl_steps + 1)) + + # Interface + s.send_ctrl = SendIfcRTL(CtrlType) + # s.recv_waddr = RecvIfcRTL(AddrType) + # s.recv_ctrl = RecvIfcRTL(CtrlType) + + s.recv_pkt = RecvIfcRTL(CtrPktType) + + # Component + s.reg_file = RegisterFile(CtrlType, ctrl_mem_size, 1, 1) + s.recv_pkt_queue = NormalQueue(CtrPktType) + s.times = Wire(TimeType) + s.start_iterate_ctrl = Wire(b1) + + # Connections + s.send_ctrl.msg //= s.reg_file.rdata[0] + s.recv_pkt_queue.recv //= s.recv_pkt.recv + # s.reg_file.waddr[0] //= s.recv_waddr.msg + # s.reg_file.wdata[0] //= s.recv_ctrl.msg + # s.reg_file.wen[0] //= lambda: s.recv_ctrl.en & s.recv_waddr.en + + @update + def update_msg(): + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG: + s.reg_file.waddr[0] //= s.recv_pkt_queue.send.msg.ctrl_addr + s.reg_file.wdata[0] //= s.recv_pkt_queue.send.msg.ctrl_data + s.reg_file.wen[0] //= s.recv_pkt_queue.send.en + + # @yo96? depending on data, causing combinational loop or not? + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG | \ + s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH | \ + s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE | \ + s.recv_pkt_queue.send.msg.ctrl_action == CMD_HALT: + s.recv_pkt_queue.send.rdy @= 1 + # TODO: Extend for the other commands. Maybe another queue to + # handle complicated actions. + # else: + + + @update + def update_send_out_signal(): + if s.start_iterate_ctrl == b1(1): + if ((total_ctrl_steps > 0) & \ + (s.times == TimeType(total_ctrl_steps))) | \ + (s.reg_file.rdata[0].ctrl == OPT_START): + s.send_ctrl.en @= b1(0) + else: + s.send_ctrl.en @= s.send_ctrl.rdy + # @yo96? What would happen if we overwrite? ok? + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH | \ + s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: + s.send_ctrl.en @= b1(0) + + # @update + # def update_signal(): + # if ((total_ctrl_steps > 0) & \ + # (s.times == TimeType(total_ctrl_steps))) | \ + # (s.reg_file.rdata[0].ctrl == OPT_START): + # s.send_ctrl.en @= b1(0) + # else: + # s.send_ctrl.en @= s.send_ctrl.rdy # s.recv_raddr[i].rdy + # s.recv_waddr.rdy @= b1(1) + # s.recv_ctrl.rdy @= b1(1) + # s.recv_pkt.rdy @= recv_pkt_queue.recv.rdy + + @update_ff + def update_whether_we_can_iterate_ctrl(): + # if s.reg_file.rdata[0].ctrl != OPT_START: + # @yo96? data is still there, not released yet? + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH: + s.start_iterate_ctrl <<= 1 + elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: + s.start_iterate_ctrl <<= 0 + else: + s.start_iterate_ctrl <<= 1 + + @update_ff + def update_raddr(): + # if s.reg_file.rdata[0].ctrl != OPT_START: + if s.start_iterate_ctrl == b1(1): + # @yo96? There is no else, what would happen on the s.times and raddr[0]? + if (total_ctrl_steps == 0) | \ + (s.times < TimeType(total_ctrl_steps)): + s.times <<= s.times + TimeType(1) + # Reads the next ctrl signal only when the current one is done. + if s.send_ctrl.rdy: + if zext(s.reg_file.raddr[0] + 1, PCType) == \ + PCType(ctrl_count_per_iter): + s.reg_file.raddr[0] <<= AddrType(0) + else: + s.reg_file.raddr[0] <<= s.reg_file.raddr[0] + AddrType(1) + + def line_trace(s): + out_str = "||".join([str(data) for data in s.reg_file.regs]) + return f'{s.recv_ctrl.msg} : [{out_str}] : {s.send_ctrl.msg}' + From 2ea2ecdea5b52af6e8d9df9d5b636f5616e0e04d Mon Sep 17 00:00:00 2001 From: tancheng Date: Sun, 22 Dec 2024 10:11:10 +0000 Subject: [PATCH 02/19] [test] CtrlMemDynamicRTL_test --- lib/messages.py | 104 ++++++++++++++---- mem/ctrl/CtrlMemDynamicRTL.py | 100 ++++++++--------- mem/ctrl/test/CtrlMemDynamicRTL_test.py | 136 ++++++++++++++++++++++++ 3 files changed, 273 insertions(+), 67 deletions(-) create mode 100644 mem/ctrl/test/CtrlMemDynamicRTL_test.py diff --git a/lib/messages.py b/lib/messages.py index ffca2eb..d946d89 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -130,12 +130,13 @@ def str_func( s ): ) -def mk_separate_ctrl(num_fu_inports = 4, +def mk_separate_ctrl(num_operations = 7, + num_fu_inports = 4, num_fu_outports = 2, num_tile_inports = 5, num_tile_outports = 5, prefix = "CGRAConfig" ): - operation_nbits = 6 + operation_nbits = clog2(num_operations) OperationType = mk_bits(operation_nbits) TileInportsType = mk_bits(clog2(num_tile_inports + 1)) TileOutportsType = mk_bits(clog2(num_tile_outports + 1)) @@ -297,31 +298,96 @@ def str_func(s): # Ring for delivering ctrl signals and commands across tiles #========================================================================= -def mk_ring_across_tiles_pkt(nrouters = 4, ctrl_action_nbits = 2, - ctrl_addr_nbits = 4, ctrl_signal_nbits = 10, +def mk_ring_across_tiles_pkt(nrouters = 4, + ctrl_actions = 8, + ctrl_mem_size = 4, + ctrl_operations = 7, + ctrl_fu_inports = 4, + ctrl_fu_outports = 4, + ctrl_tile_inports = 5, + ctrl_tile_outports = 5, prefix="RingAcrossTilesPacket"): IdType = mk_bits(clog2(nrouters)) - OpqType = mk_bits(1) - CtrlActionType = mk_bits(ctrl_action_nbits) - CtrlAddrType = mk_bits(ctrl_addr_nbits) - CtrlSignalType = mk_bits(ctrl_signal_nbits) + opaque_nbits = 1 + OpqType = mk_bits(opaque_nbits) + CtrlActionType = mk_bits(clog2(ctrl_actions)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + CtrlOperationType = mk_bits(clog2(ctrl_operations)) + CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) + CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) + num_routing_outports = ctrl_tile_outports + ctrl_fu_inports + CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) + CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) + CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) + CtrlPredicateType = mk_bits(1) + + new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ + f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_"\ + f"{ctrl_fu_outports}_{ctrl_tile_inports}_{ctrl_tile_outports}" - new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_action_nbits}_" \ - f"{ctrl_addr_nbits}_{ctrl_signal_nbits}" def str_func(s): + out_str = '(ctrl_operation)' + str(s.ctrl_operation) + out_str += '|(ctrl_fu_in)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_in[i])) + + out_str += '|(ctrl_predicate)' + out_str += str(int(s.ctrl_predicate)) + + out_str += '|(ctrl_routing_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_xbar_outport[i])) + + out_str += '|(ctrl_fu_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_xbar_outport[i])) + + out_str += '|(ctrl_predicate_in)' + for i in range(ctrl_tile_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_predicate_in[i])) + return f"{s.src}>{s.dst}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ - f"{s.ctrl_signal}" + f"{out_str}" - return mk_bitstruct(new_name, { - 'src': IdType, - 'dst': IdType, - 'opaque': OpqType, - 'ctrl_action': CtrlActionType, - 'ctrl_addr': CtrlAddrType, - 'ctrl_signal': CtrlSignalType, - }, + field_dict = {} + field_dict['src'] = IdType + field_dict['dst'] = IdType + field_dict['opaque'] = OpqType + field_dict['ctrl_action'] = CtrlActionType + field_dict['ctrl_addr'] = CtrlAddrType + field_dict['ctrl_operation'] = CtrlOperationType + # TODO: need fix to pair `predicate` with specific operation. + # The 'predicate' indicates whether the current operation is based on + # the partial predication or not. Note that 'predicate' is different + # from the following 'predicate_in', which contributes to the 'predicate' + # at the next cycle. + field_dict['ctrl_predicate'] = CtrlPredicateType + # The fu_in indicates the input register ID (i.e., operands) for the + # operation. + field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] + + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( + num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( + num_routing_outports)] + # I assume one tile supports single predicate during the entire execution + # time, as it is hard to distinguish predication for different operations + # (we automatically update, i.e., 'or', the predicate stored in the + # predicate register). This should be guaranteed by the compiler. + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( + ctrl_tile_inports)] + + return mk_bitstruct(new_name, field_dict, namespace = {'__str__': str_func} ) diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index 9b7f02b..0aae72b 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -13,12 +13,14 @@ from pymtl3.stdlib.dstruct.queues import NormalQueue from pymtl3.stdlib.primitive import RegisterFile from ...lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ...lib.cmd_type import * from ...lib.opt_type import * class CtrlMemDynamicRTL(Component): - def construct(s, CtrlPktType, CtrlActionType, CtrlSignalType, - ctrl_mem_size, ctrl_count_per_iter = 4, + def construct(s, CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, ctrl_count_per_iter = 4, total_ctrl_steps = 4): # The total_ctrl_steps indicates the number of steps the ctrl @@ -31,40 +33,51 @@ def construct(s, CtrlPktType, CtrlActionType, CtrlSignalType, CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) PCType = mk_bits(clog2(ctrl_count_per_iter + 1)) TimeType = mk_bits(clog2(total_ctrl_steps + 1)) + num_routing_outports = num_tile_outports + num_fu_inports # Interface - s.send_ctrl = SendIfcRTL(CtrlType) - # s.recv_waddr = RecvIfcRTL(AddrType) - # s.recv_ctrl = RecvIfcRTL(CtrlType) - - s.recv_pkt = RecvIfcRTL(CtrPktType) + s.send_ctrl = SendIfcRTL(CtrlSignalType) + s.recv_pkt = RecvIfcRTL(CtrlPktType) # Component - s.reg_file = RegisterFile(CtrlType, ctrl_mem_size, 1, 1) - s.recv_pkt_queue = NormalQueue(CtrPktType) + s.reg_file = RegisterFile(CtrlSignalType, ctrl_mem_size, 1, 1) + s.recv_pkt_queue = NormalQueue(CtrlPktType) s.times = Wire(TimeType) s.start_iterate_ctrl = Wire(b1) # Connections s.send_ctrl.msg //= s.reg_file.rdata[0] - s.recv_pkt_queue.recv //= s.recv_pkt.recv - # s.reg_file.waddr[0] //= s.recv_waddr.msg - # s.reg_file.wdata[0] //= s.recv_ctrl.msg - # s.reg_file.wen[0] //= lambda: s.recv_ctrl.en & s.recv_waddr.en + s.recv_pkt.rdy //= s.recv_pkt_queue.enq_rdy @update def update_msg(): - if s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG: - s.reg_file.waddr[0] //= s.recv_pkt_queue.send.msg.ctrl_addr - s.reg_file.wdata[0] //= s.recv_pkt_queue.send.msg.ctrl_data - s.reg_file.wen[0] //= s.recv_pkt_queue.send.en + + s.recv_pkt_queue.enq_en @= s.recv_pkt.en & s.recv_pkt_queue.enq_rdy + s.recv_pkt_queue.enq_msg @= CtrlPktType() + s.reg_file.wdata[0] @= CtrlSignalType() + if s.recv_pkt.en: + s.recv_pkt_queue.enq_msg @= s.recv_pkt.msg + + if s.recv_pkt_queue.deq_msg.ctrl_action == CMD_CONFIG: + s.reg_file.wen[0] @= 1 # s.recv_pkt_queue.deq_en + s.reg_file.waddr[0] @= s.recv_pkt_queue.deq_msg.ctrl_addr + # Fills the fields of the control signal. + s.reg_file.wdata[0].ctrl @= s.recv_pkt_queue.deq_msg.ctrl_operation + s.reg_file.wdata[0].predicate @= s.recv_pkt_queue.deq_msg.ctrl_predicate + for i in range(num_fu_inports): + s.reg_file.wdata[0].fu_in[i] @= s.recv_pkt_queue.deq_msg.ctrl_fu_in[i] + for i in range(num_routing_outports): + s.reg_file.wdata[0].routing_xbar_outport[i] @= s.recv_pkt_queue.deq_msg.ctrl_routing_xbar_outport[i] + s.reg_file.wdata[0].fu_xbar_outport[i] @= s.recv_pkt_queue.deq_msg.ctrl_fu_xbar_outport[i] + for i in range(num_tile_inports): + s.reg_file.wdata[0].routing_predicate_in[i] @= s.recv_pkt_queue.deq_msg.ctrl_routing_predicate_in[i] # @yo96? depending on data, causing combinational loop or not? - if s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG | \ - s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH | \ - s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE | \ - s.recv_pkt_queue.send.msg.ctrl_action == CMD_HALT: - s.recv_pkt_queue.send.rdy @= 1 + if (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_CONFIG) | \ + (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_LAUNCH) | \ + (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE) | \ + (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE): + s.recv_pkt_queue.deq_en @= 1 # TODO: Extend for the other commands. Maybe another queue to # handle complicated actions. # else: @@ -80,32 +93,23 @@ def update_send_out_signal(): else: s.send_ctrl.en @= s.send_ctrl.rdy # @yo96? What would happen if we overwrite? ok? - if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH | \ - s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: + if s.recv_pkt_queue.deq_rdy & \ + ((s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE) | \ + (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE)): s.send_ctrl.en @= b1(0) - # @update - # def update_signal(): - # if ((total_ctrl_steps > 0) & \ - # (s.times == TimeType(total_ctrl_steps))) | \ - # (s.reg_file.rdata[0].ctrl == OPT_START): - # s.send_ctrl.en @= b1(0) - # else: - # s.send_ctrl.en @= s.send_ctrl.rdy # s.recv_raddr[i].rdy - # s.recv_waddr.rdy @= b1(1) - # s.recv_ctrl.rdy @= b1(1) - # s.recv_pkt.rdy @= recv_pkt_queue.recv.rdy - @update_ff def update_whether_we_can_iterate_ctrl(): - # if s.reg_file.rdata[0].ctrl != OPT_START: - # @yo96? data is still there, not released yet? - if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH: - s.start_iterate_ctrl <<= 1 - elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: - s.start_iterate_ctrl <<= 0 - else: - s.start_iterate_ctrl <<= 1 + if s.recv_pkt_queue.deq_rdy: + # @yo96? data is still there, not released yet? + if s.recv_pkt_queue.deq_msg.ctrl_action == CMD_LAUNCH: + s.start_iterate_ctrl <<= 1 + elif s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE: + s.start_iterate_ctrl <<= 0 + elif s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE: + s.start_iterate_ctrl <<= 0 + # else: + # s.start_iterate_ctrl <<= 1 @update_ff def update_raddr(): @@ -119,11 +123,11 @@ def update_raddr(): if s.send_ctrl.rdy: if zext(s.reg_file.raddr[0] + 1, PCType) == \ PCType(ctrl_count_per_iter): - s.reg_file.raddr[0] <<= AddrType(0) + s.reg_file.raddr[0] <<= CtrlAddrType(0) else: - s.reg_file.raddr[0] <<= s.reg_file.raddr[0] + AddrType(1) + s.reg_file.raddr[0] <<= s.reg_file.raddr[0] + CtrlAddrType(1) def line_trace(s): - out_str = "||".join([str(data) for data in s.reg_file.regs]) - return f'{s.recv_ctrl.msg} : [{out_str}] : {s.send_ctrl.msg}' + config_mem_str = "|".join([str(data) for data in s.reg_file.regs]) + return f'{s.recv_pkt.msg} || config_mem: [{config_mem_str}] || out: {s.send_ctrl.msg}' diff --git a/mem/ctrl/test/CtrlMemDynamicRTL_test.py b/mem/ctrl/test/CtrlMemDynamicRTL_test.py new file mode 100644 index 0000000..b299d7c --- /dev/null +++ b/mem/ctrl/test/CtrlMemDynamicRTL_test.py @@ -0,0 +1,136 @@ +""" +========================================================================== +CtrlMemDynamicRTL_test.py +========================================================================== +Test cases for control memory with command-based action handling. + +Author : Cheng Tan + Date : Dec 21, 2024 +""" + +from pymtl3 import * +from ..CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ....fu.single.AdderRTL import AdderRTL +from ....lib.basic.en_rdy.test_sinks import TestSinkRTL +from ....lib.basic.en_rdy.test_srcs import TestSrcRTL +from ....lib.messages import * +from ....lib.cmd_type import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness( Component ): + + def construct( s, MemUnit, DataType, PredicateType, CtrlPktType, + CtrlSignalType, ctrl_mem_size, data_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, src0_msgs, src1_msgs, ctrl_pkts, + sink_msgs): + + AddrType = mk_bits(clog2(ctrl_mem_size)) + + s.src_data0 = TestSrcRTL(DataType, src0_msgs) + s.src_data1 = TestSrcRTL(DataType, src1_msgs) + # s.src_waddr = TestSrcRTL(AddrType, ctrl_waddr ) + # s.src_wdata = TestSrcRTL(ConfigType, ctrl_msgs ) + s.src_pkt = TestSrcRTL(CtrlPktType, ctrl_pkts) + s.sink_out = TestSinkRTL(DataType, sink_msgs) + + s.alu = AdderRTL(DataType, PredicateType, CtrlSignalType, 2, 2, + data_mem_size ) + s.ctrl_mem = MemUnit(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, len(ctrl_pkts), len(ctrl_pkts)) + + s.alu.recv_in_count[0] //= 1 + s.alu.recv_in_count[1] //= 1 + + connect(s.alu.recv_opt, s.ctrl_mem.send_ctrl) + + # connect(s.src_waddr.send, s.ctrl_mem.recv_waddr) + # connect(s.src_wdata.send, s.ctrl_mem.recv_ctrl) + connect(s.src_pkt.send, s.ctrl_mem.recv_pkt) + + connect(s.src_data0.send, s.alu.recv_in[0]) + connect(s.src_data1.send, s.alu.recv_in[1]) + connect(s.alu.send_out[0], s.sink_out.recv) + + def done(s): + return s.src_data0.done() and s.src_data1.done() and \ + s.src_pkt.done() and s.sink_out.done() + + def line_trace( s ): + return s.alu.line_trace() + " || " +s.ctrl_mem.line_trace() + +def run_sim( test_harness, max_cycles=20 ): + test_harness.elaborate() + test_harness.apply( DefaultPassGroup() ) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print( "{}:{}".format( ncycles, test_harness.line_trace() )) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print( "{}:{}".format( ncycles, test_harness.line_trace() )) + + # Check timeout + + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_Ctrl(): + MemUnit = CtrlMemDynamicRTL + DataType = mk_data(16, 1) + PredicateType = mk_predicate(1, 1) + ctrl_mem_size = 16 + ctrl_addr_nbits = clog2(ctrl_mem_size) + data_mem_size = 8 + num_fu_inports = 2 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_terminals = 4 + num_ctrl_actions = 6 + ctrl_action_nbits = clog2(num_ctrl_actions) + num_ctrl_operations = 64 + CtrlPktType = mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + AddrType = mk_bits(clog2(ctrl_mem_size)) + src_data0 = [DataType(1, 1), DataType(5, 1), DataType(7, 1), DataType(6, 1)] + src_data1 = [DataType(6, 1), DataType(1, 1), DataType(2, 1), DataType(3, 1)] + + src_ctrl_pkt = [CtrlPktType(0, 1, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, CMD_CONFIG, 3, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] + + sink_out = [DataType(7, 1), DataType(4, 1), DataType(5, 1), DataType(9, 1)] + th = TestHarness(MemUnit, DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, src_data0, src_data1, + src_ctrl_pkt, sink_out) + run_sim(th) + From 037d4346bb3e27fa8234fe1088c54646e46e1c22 Mon Sep 17 00:00:00 2001 From: tancheng Date: Mon, 23 Dec 2024 02:39:30 +0000 Subject: [PATCH 03/19] [feature] Replace en/rdy with val/rdy for ctrl mem and provide test --- lib/messages.py | 2 + mem/ctrl/CtrlMemDynamicRTL.py | 65 +++++---- mem/ctrl/RingMultiCtrlMemDynamicRTL.py | 78 +++++++++++ mem/ctrl/test/CtrlMemDynamicRTL_test.py | 13 +- mem/ctrl/test/RingCtrlMemDynamicRTL_test.py | 147 ++++++++++++++++++++ 5 files changed, 270 insertions(+), 35 deletions(-) create mode 100644 mem/ctrl/RingMultiCtrlMemDynamicRTL.py create mode 100644 mem/ctrl/test/RingCtrlMemDynamicRTL_test.py diff --git a/lib/messages.py b/lib/messages.py index d946d89..0b58aaf 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -321,6 +321,7 @@ def mk_ring_across_tiles_pkt(nrouters = 4, CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) CtrlPredicateType = mk_bits(1) + VcIdType = mk_bits(1) new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_"\ @@ -363,6 +364,7 @@ def str_func(s): field_dict['src'] = IdType field_dict['dst'] = IdType field_dict['opaque'] = OpqType + field_dict['vc_id'] = VcIdType field_dict['ctrl_action'] = CtrlActionType field_dict['ctrl_addr'] = CtrlAddrType field_dict['ctrl_operation'] = CtrlOperationType diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index 0aae72b..1bde5ac 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -10,9 +10,11 @@ """ from pymtl3 import * -from pymtl3.stdlib.dstruct.queues import NormalQueue +# from pymtl3.stdlib.dstruct.queues import NormalQueue from pymtl3.stdlib.primitive import RegisterFile -from ...lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ...lib.basic.en_rdy.ifcs import SendIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ...lib.basic.val_rdy.queues import NormalQueueRTL from ...lib.cmd_type import * from ...lib.opt_type import * @@ -37,47 +39,52 @@ def construct(s, CtrlPktType, CtrlSignalType, ctrl_mem_size, # Interface s.send_ctrl = SendIfcRTL(CtrlSignalType) - s.recv_pkt = RecvIfcRTL(CtrlPktType) + s.recv_pkt = ValRdyRecvIfcRTL(CtrlPktType) # Component s.reg_file = RegisterFile(CtrlSignalType, ctrl_mem_size, 1, 1) - s.recv_pkt_queue = NormalQueue(CtrlPktType) + # FIXME: valrdy normal queue RTL? + s.recv_pkt_queue = NormalQueueRTL(CtrlPktType) s.times = Wire(TimeType) s.start_iterate_ctrl = Wire(b1) # Connections s.send_ctrl.msg //= s.reg_file.rdata[0] - s.recv_pkt.rdy //= s.recv_pkt_queue.enq_rdy + # s.recv_pkt.rdy //= s.recv_pkt_queue.enq_rdy + s.recv_pkt //= s.recv_pkt_queue.recv @update def update_msg(): - s.recv_pkt_queue.enq_en @= s.recv_pkt.en & s.recv_pkt_queue.enq_rdy - s.recv_pkt_queue.enq_msg @= CtrlPktType() + # s.recv_pkt_queue.enq_en @= s.recv_pkt.en & s.recv_pkt_queue.enq_rdy + # s.recv_pkt_queue.enq_msg @= CtrlPktType() + s.reg_file.wen[0] @= 0 s.reg_file.wdata[0] @= CtrlSignalType() - if s.recv_pkt.en: - s.recv_pkt_queue.enq_msg @= s.recv_pkt.msg + s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr - if s.recv_pkt_queue.deq_msg.ctrl_action == CMD_CONFIG: + # if s.recv_pkt.en: + # s.recv_pkt_queue.enq_msg @= s.recv_pkt.msg + + if s.recv_pkt_queue.send.val & (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG): s.reg_file.wen[0] @= 1 # s.recv_pkt_queue.deq_en - s.reg_file.waddr[0] @= s.recv_pkt_queue.deq_msg.ctrl_addr + s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr # Fills the fields of the control signal. - s.reg_file.wdata[0].ctrl @= s.recv_pkt_queue.deq_msg.ctrl_operation - s.reg_file.wdata[0].predicate @= s.recv_pkt_queue.deq_msg.ctrl_predicate + s.reg_file.wdata[0].ctrl @= s.recv_pkt_queue.send.msg.ctrl_operation + s.reg_file.wdata[0].predicate @= s.recv_pkt_queue.send.msg.ctrl_predicate for i in range(num_fu_inports): - s.reg_file.wdata[0].fu_in[i] @= s.recv_pkt_queue.deq_msg.ctrl_fu_in[i] + s.reg_file.wdata[0].fu_in[i] @= s.recv_pkt_queue.send.msg.ctrl_fu_in[i] for i in range(num_routing_outports): - s.reg_file.wdata[0].routing_xbar_outport[i] @= s.recv_pkt_queue.deq_msg.ctrl_routing_xbar_outport[i] - s.reg_file.wdata[0].fu_xbar_outport[i] @= s.recv_pkt_queue.deq_msg.ctrl_fu_xbar_outport[i] + s.reg_file.wdata[0].routing_xbar_outport[i] @= s.recv_pkt_queue.send.msg.ctrl_routing_xbar_outport[i] + s.reg_file.wdata[0].fu_xbar_outport[i] @= s.recv_pkt_queue.send.msg.ctrl_fu_xbar_outport[i] for i in range(num_tile_inports): - s.reg_file.wdata[0].routing_predicate_in[i] @= s.recv_pkt_queue.deq_msg.ctrl_routing_predicate_in[i] + s.reg_file.wdata[0].routing_predicate_in[i] @= s.recv_pkt_queue.send.msg.ctrl_routing_predicate_in[i] # @yo96? depending on data, causing combinational loop or not? - if (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_CONFIG) | \ - (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_LAUNCH) | \ - (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE) | \ - (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE): - s.recv_pkt_queue.deq_en @= 1 + if (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE): + s.recv_pkt_queue.send.rdy @= 1 # TODO: Extend for the other commands. Maybe another queue to # handle complicated actions. # else: @@ -93,20 +100,20 @@ def update_send_out_signal(): else: s.send_ctrl.en @= s.send_ctrl.rdy # @yo96? What would happen if we overwrite? ok? - if s.recv_pkt_queue.deq_rdy & \ - ((s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE) | \ - (s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE)): + if s.recv_pkt_queue.send.val & \ + ((s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE)): s.send_ctrl.en @= b1(0) @update_ff def update_whether_we_can_iterate_ctrl(): - if s.recv_pkt_queue.deq_rdy: + if s.recv_pkt_queue.send.val: # @yo96? data is still there, not released yet? - if s.recv_pkt_queue.deq_msg.ctrl_action == CMD_LAUNCH: + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH: s.start_iterate_ctrl <<= 1 - elif s.recv_pkt_queue.deq_msg.ctrl_action == CMD_TERMINATE: + elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: s.start_iterate_ctrl <<= 0 - elif s.recv_pkt_queue.deq_msg.ctrl_action == CMD_PAUSE: + elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE: s.start_iterate_ctrl <<= 0 # else: # s.start_iterate_ctrl <<= 1 diff --git a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py new file mode 100644 index 0000000..0ae6a8c --- /dev/null +++ b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py @@ -0,0 +1,78 @@ +""" +========================================================================== +RingMultiCtrlMemDynamicRTL.py +========================================================================== +Ring connecting multiple control memories. + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from .CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ...lib.basic.en_rdy.ifcs import SendIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ...lib.opt_type import * +from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ...cgra.CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL +from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos + +class RingMultiCtrlMemDynamicRTL(Component): + + def construct(s, CtrlPktType, CtrlSignalType, width, height, + ctrl_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + ctrl_count_per_iter = 4, total_ctrl_steps = 4): + + # Constant + num_terminals = width * height + CtrlRingPos = mk_ring_pos(num_terminals) + s.num_terminals = width * height + # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + # ControllerIdType = mk_bits(clog2(num_terminals)) + + # Interface + # # Request from/to CPU. + # s.recv_from_cpu = RecvIfcRTL(CGRADataType) + # s.send_to_cpu = SendIfcRTL(CGRADataType) + # s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] + # for _ in range(s.num_terminals)] + # s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] + # for _ in range(s.num_terminals)] + s.send_ctrl = [SendIfcRTL(CtrlSignalType) for _ in range(s.num_terminals)] + s.recv_pkt_from_controller = ValRdyRecvIfcRTL(CtrlPktType) + + # Components + # s.cgra = [CGRAWithCrossbarDataMemRTL( + # CGRADataType, PredicateType, CtrlType, NocPktType, CmdType, + # ControllerIdType, terminal_id, width, height, ctrl_mem_size, + # data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, + # num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, + # preload_data = None, preload_const = None) + # for terminal_id in range(s.num_terminals)] + s.ctrl_memories = [ + CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, ctrl_count_per_iter, + total_ctrl_steps) for terminal_id in range(s.num_terminals)] + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, num_terminals, 0) + + # Connections + for i in range(s.num_terminals): + s.ctrl_ring.send[i] //= s.ctrl_memories[i].recv_pkt + + s.ctrl_ring.recv[0] //= s.recv_pkt_from_controller + for i in range(1, s.num_terminals): + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + + for i in range(s.num_terminals): + s.ctrl_memories[i].send_ctrl //= s.send_ctrl[i] + + def line_trace(s): + res = "||\n".join([(("[ctrl_memory["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.ctrl_memories)]) + res += " ## ctrl_ring: " + s.ctrl_ring.line_trace() + return res + diff --git a/mem/ctrl/test/CtrlMemDynamicRTL_test.py b/mem/ctrl/test/CtrlMemDynamicRTL_test.py index b299d7c..a158862 100644 --- a/mem/ctrl/test/CtrlMemDynamicRTL_test.py +++ b/mem/ctrl/test/CtrlMemDynamicRTL_test.py @@ -13,6 +13,7 @@ from ....fu.single.AdderRTL import AdderRTL from ....lib.basic.en_rdy.test_sinks import TestSinkRTL from ....lib.basic.en_rdy.test_srcs import TestSrcRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL from ....lib.messages import * from ....lib.cmd_type import * from ....lib.opt_type import * @@ -35,7 +36,7 @@ def construct( s, MemUnit, DataType, PredicateType, CtrlPktType, s.src_data1 = TestSrcRTL(DataType, src1_msgs) # s.src_waddr = TestSrcRTL(AddrType, ctrl_waddr ) # s.src_wdata = TestSrcRTL(ConfigType, ctrl_msgs ) - s.src_pkt = TestSrcRTL(CtrlPktType, ctrl_pkts) + s.src_pkt = ValRdyTestSrcRTL(CtrlPktType, ctrl_pkts) s.sink_out = TestSinkRTL(DataType, sink_msgs) s.alu = AdderRTL(DataType, PredicateType, CtrlSignalType, 2, 2, @@ -121,11 +122,11 @@ def test_Ctrl(): src_data0 = [DataType(1, 1), DataType(5, 1), DataType(7, 1), DataType(6, 1)] src_data1 = [DataType(6, 1), DataType(1, 1), DataType(2, 1), DataType(3, 1)] - src_ctrl_pkt = [CtrlPktType(0, 1, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), - CtrlPktType(0, 1, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), - CtrlPktType(0, 1, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister), - CtrlPktType(0, 1, 0, CMD_CONFIG, 3, OPT_ADD, b1(0), pickRegister), - CtrlPktType(0, 1, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] + src_ctrl_pkt = [CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 3, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] sink_out = [DataType(7, 1), DataType(4, 1), DataType(5, 1), DataType(9, 1)] th = TestHarness(MemUnit, DataType, PredicateType, CtrlPktType, CtrlSignalType, diff --git a/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py b/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py new file mode 100644 index 0000000..c6655b0 --- /dev/null +++ b/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py @@ -0,0 +1,147 @@ +""" +========================================================================== +CtrlMemDynamicRTL_test.py +========================================================================== +Test cases for control memory with command-based action handling. + +Author : Cheng Tan + Date : Dec 21, 2024 +""" + +from pymtl3 import * +from ..RingMultiCtrlMemDynamicRTL import RingMultiCtrlMemDynamicRTL +from ....fu.single.AdderRTL import AdderRTL +from ....lib.basic.en_rdy.test_sinks import TestSinkRTL +from ....lib.basic.en_rdy.test_srcs import TestSrcRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL +from ....lib.messages import * +from ....lib.cmd_type import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness( Component ): + + def construct( s, DUT, DataType, PredicateType, CtrlPktType, + CtrlSignalType, ctrl_mem_size, width, height, + data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, ctrl_pkts, + sink_msgs): + + s.width = width + s.height = height + s.src_pkt = ValRdyTestSrcRTL(CtrlPktType, ctrl_pkts) + s.sink_out = [TestSinkRTL(CtrlSignalType, sink_msgs[i]) + for i in range(width * height)] + + s.dut = \ + DUT(CtrlPktType, CtrlSignalType, width, height, + ctrl_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + len(ctrl_pkts), len(ctrl_pkts)) + + connect(s.src_pkt.send, s.dut.recv_pkt_from_controller) + for i in range(width * height): + connect(s.dut.send_ctrl[i], s.sink_out[i].recv) + + def done(s): + if not s.src_pkt.done(): + return False + for i in range(s.width * s.height): + if not s.sink_out[i].done(): + return False + return True + + def line_trace(s): + return s.dut.line_trace() + +def run_sim(test_harness, max_cycles = 40): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format( ncycles, test_harness.line_trace())) + + # Check timeout + + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_Ctrl(): + MemUnit = RingMultiCtrlMemDynamicRTL + DataType = mk_data(16, 1) + PredicateType = mk_predicate(1, 1) + ctrl_mem_size = 16 + ctrl_addr_nbits = clog2(ctrl_mem_size) + data_mem_size = 8 + num_fu_inports = 2 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + width = 2 + height = 2 + num_terminals = width * height + num_ctrl_actions = 6 + ctrl_action_nbits = clog2(num_ctrl_actions) + num_ctrl_operations = 64 + CtrlPktType = mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + + src_ctrl_pkt = [ # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_LAUNCH, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] + + sink_out = [ + [CtrlSignalType(OPT_ADD, 0, pickRegister), + CtrlSignalType(OPT_SUB, 0, pickRegister)], + # Ctrl memory 1 first write into address 1, then address 0. + [CtrlSignalType(OPT_ADD, 0, pickRegister), + CtrlSignalType(OPT_SUB, 0, pickRegister)], + + [CtrlSignalType(OPT_SUB, 0, pickRegister), + CtrlSignalType(OPT_ADD, 0, pickRegister)], + + [CtrlSignalType(OPT_SUB, 0, pickRegister), + CtrlSignalType(OPT_ADD, 0, pickRegister)]] + th = TestHarness(MemUnit, DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, width, height, data_mem_size, num_fu_inports, + num_fu_outports, num_tile_inports, num_tile_outports, + src_ctrl_pkt, sink_out) + run_sim(th) + From eee69b933d99702e612a3b308fec94f0878e7df8 Mon Sep 17 00:00:00 2001 From: tancheng Date: Mon, 23 Dec 2024 06:23:21 +0000 Subject: [PATCH 04/19] [feature] Update Tile test for preloading ctrl signals via control packets --- mem/ctrl/RingMultiCtrlMemDynamicRTL.py | 18 --- mem/ctrl/test/CtrlMemDynamicRTL_test.py | 10 +- tile/TileSeparateCrossbarRTL.py | 35 +++--- tile/test/TileSeparateCrossbarRTL_test.py | 143 ++++++++++++---------- 4 files changed, 106 insertions(+), 100 deletions(-) diff --git a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py index 0ae6a8c..258ea8a 100644 --- a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py +++ b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py @@ -19,38 +19,20 @@ from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos class RingMultiCtrlMemDynamicRTL(Component): - def construct(s, CtrlPktType, CtrlSignalType, width, height, ctrl_mem_size, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, ctrl_count_per_iter = 4, total_ctrl_steps = 4): - # Constant num_terminals = width * height CtrlRingPos = mk_ring_pos(num_terminals) s.num_terminals = width * height - # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - # ControllerIdType = mk_bits(clog2(num_terminals)) # Interface - # # Request from/to CPU. - # s.recv_from_cpu = RecvIfcRTL(CGRADataType) - # s.send_to_cpu = SendIfcRTL(CGRADataType) - # s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - # for _ in range(s.num_terminals)] - # s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - # for _ in range(s.num_terminals)] s.send_ctrl = [SendIfcRTL(CtrlSignalType) for _ in range(s.num_terminals)] s.recv_pkt_from_controller = ValRdyRecvIfcRTL(CtrlPktType) # Components - # s.cgra = [CGRAWithCrossbarDataMemRTL( - # CGRADataType, PredicateType, CtrlType, NocPktType, CmdType, - # ControllerIdType, terminal_id, width, height, ctrl_mem_size, - # data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - # num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, - # preload_data = None, preload_const = None) - # for terminal_id in range(s.num_terminals)] s.ctrl_memories = [ CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, ctrl_mem_size, num_fu_inports, num_fu_outports, num_tile_inports, diff --git a/mem/ctrl/test/CtrlMemDynamicRTL_test.py b/mem/ctrl/test/CtrlMemDynamicRTL_test.py index a158862..5fab794 100644 --- a/mem/ctrl/test/CtrlMemDynamicRTL_test.py +++ b/mem/ctrl/test/CtrlMemDynamicRTL_test.py @@ -62,23 +62,23 @@ def done(s): return s.src_data0.done() and s.src_data1.done() and \ s.src_pkt.done() and s.sink_out.done() - def line_trace( s ): + def line_trace(s): return s.alu.line_trace() + " || " +s.ctrl_mem.line_trace() -def run_sim( test_harness, max_cycles=20 ): +def run_sim(test_harness, max_cycles = 20): test_harness.elaborate() - test_harness.apply( DefaultPassGroup() ) + test_harness.apply(DefaultPassGroup()) test_harness.sim_reset() # Run simulation ncycles = 0 print() - print( "{}:{}".format( ncycles, test_harness.line_trace() )) + print("{}:{}".format(ncycles, test_harness.line_trace())) while not test_harness.done() and ncycles < max_cycles: test_harness.sim_tick() ncycles += 1 - print( "{}:{}".format( ncycles, test_harness.line_trace() )) + print( "{}:{}".format(ncycles, test_harness.line_trace())) # Check timeout diff --git a/tile/TileSeparateCrossbarRTL.py b/tile/TileSeparateCrossbarRTL.py index 4dff881..d083038 100644 --- a/tile/TileSeparateCrossbarRTL.py +++ b/tile/TileSeparateCrossbarRTL.py @@ -25,8 +25,9 @@ from ..fu.single.MemUnitRTL import MemUnitRTL from ..fu.single.MulRTL import MulRTL from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL from ..mem.const.ConstQueueRTL import ConstQueueRTL -from ..mem.ctrl.CtrlMemRTL import CtrlMemRTL +from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL from ..noc.ChannelNormalRTL import ChannelNormalRTL from ..noc.LinkOrRTL import LinkOrRTL @@ -35,7 +36,7 @@ class TileSeparateCrossbarRTL(Component): - def construct(s, DataType, PredicateType, CtrlType, + def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, @@ -60,8 +61,9 @@ def construct(s, DataType, PredicateType, CtrlType, num_tile_outports)] # Ctrl. - s.recv_waddr = RecvIfcRTL(CtrlAddrType) - s.recv_wopt = RecvIfcRTL(CtrlType) + # s.recv_waddr = RecvIfcRTL(CtrlAddrType) + # s.recv_wopt = RecvIfcRTL(CtrlSignalType) + s.recv_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) # Data. s.to_mem_raddr = SendIfcRTL(DataAddrType) @@ -70,18 +72,20 @@ def construct(s, DataType, PredicateType, CtrlType, s.to_mem_wdata = SendIfcRTL(DataType) # Components. - s.element = FlexibleFuRTL(DataType, PredicateType, CtrlType, + s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) s.const_queue = ConstQueueRTL(DataType, const_list if const_list != None else [DataType(0)]) - s.routing_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlType, - num_routing_xbar_inports, - num_routing_xbar_outports) - s.fu_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlType, - num_fu_xbar_inports, - num_fu_xbar_outports) - s.ctrl_mem = CtrlMemRTL(CtrlType, ctrl_mem_size, num_ctrl, - total_steps) + s.routing_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlSignalType, + num_routing_xbar_inports, + num_routing_xbar_outports) + s.fu_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlSignalType, + num_fu_xbar_inports, + num_fu_xbar_outports) + s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + num_ctrl, total_steps) # The `tile_out_channel` indicates the outport channels that are # connected to the next tiles. s.tile_out_channel = [ChannelNormalRTL(DataType) for _ in range( @@ -105,8 +109,9 @@ def construct(s, DataType, PredicateType, CtrlType, # Connections. # Ctrl. - s.ctrl_mem.recv_waddr //= s.recv_waddr - s.ctrl_mem.recv_ctrl //= s.recv_wopt + # s.ctrl_mem.recv_waddr //= s.recv_waddr + # s.ctrl_mem.recv_ctrl //= s.recv_wopt + s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt # Constant queue. s.element.recv_const //= s.const_queue.send_const diff --git a/tile/test/TileSeparateCrossbarRTL_test.py b/tile/test/TileSeparateCrossbarRTL_test.py index 75dfeb2..6ee012b 100644 --- a/tile/test/TileSeparateCrossbarRTL_test.py +++ b/tile/test/TileSeparateCrossbarRTL_test.py @@ -24,11 +24,12 @@ from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...lib.basic.en_rdy.test_sinks import TestSinkRTL from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL from ...lib.messages import * +from ...lib.cmd_type import * from ...lib.opt_type import * from ...mem.ctrl.CtrlMemRTL import CtrlMemRTL - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -36,32 +37,29 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, ctrl_mem_size, data_mem_size, - num_fu_inports, num_fu_outports, - src_data, src_opt, opt_waddr, sink_out): + CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, src_data, src_ctrl_pkt, sink_out): - AddrType = mk_bits(clog2(ctrl_mem_size)) + s.num_tile_inports = num_tile_inports + s.num_tile_outports = num_tile_outports - # s.src_predicate = TestSrcRTL( b1, src_predicate ) - s.src_opt = TestSrcRTL(CtrlType, src_opt) - s.opt_waddr = TestSrcRTL(AddrType, opt_waddr) + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) s.src_data = [TestSrcRTL(DataType, src_data[i]) - for i in range(4)]#num_tile_inports)] + for i in range(num_tile_inports)] s.sink_out = [TestSinkRTL(DataType, sink_out[i]) - for i in range(4)]#num_tile_outports)] + for i in range(num_tile_outports)] - s.dut = DUT(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size, len(src_opt), - len(src_opt), num_fu_inports, num_fu_outports, - 4, 4, FunctionUnit, FuList) + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, data_mem_size, 3, 3, # 3 opts + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, FunctionUnit, FuList) - # connect(s.src_predicate.send, s.dut.reg_predicate) - connect(s.src_opt.send, s.dut.recv_wopt) - connect(s.opt_waddr.send, s.dut.recv_waddr) + connect(s.src_ctrl_pkt.send, s.dut.recv_ctrl_pkt) - for i in range(4):# num_tile_inports): + for i in range(num_tile_inports): connect(s.src_data[i].send, s.dut.recv_data[i]) - for i in range(4):#num_tile_outports ): + for i in range(num_tile_outports): connect(s.dut.send_data[i], s.sink_out[i].recv) if MemUnitRTL in FuList: @@ -72,12 +70,15 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, s.dut.to_mem_wdata.rdy //= 0 def done(s): - done = True - for i in range(4): # s.num_tile_outports ): - if not s.sink_out[i].done(): # and not s.src_data[i].done(): - done = False - break - return done + for i in range(s.num_tile_inports): + if not s.src_data[i].done(): + return False + + for i in range(s.num_tile_outports): + if not s.sink_out[i].done(): + return False + + return True def line_trace(s): return s.dut.line_trace() @@ -90,11 +91,10 @@ def test_tile_alu(cmdline_opts): num_routing_outports = num_fu_inports + num_tile_outports ctrl_mem_size = 3 data_mem_size = 8 - # number of inputs of FU is fixed inside the tile - # num_fu_in = 4 - # num_fu_out = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) pickRegister0 = [FuInType(0) for x in range(num_fu_inports)] @@ -104,35 +104,53 @@ def test_tile_alu(cmdline_opts): FuList = [AdderRTL, MulRTL, MemUnitRTL] DataType = mk_data(16, 1) PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - opt_waddr = [AddrType(0), AddrType(1), AddrType(2)] - src_opt = [CtrlType(OPT_NAH, b1(0), pickRegister0, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(4), TileInType(3), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlType(OPT_ADD, b1(0), pickRegister1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(4), TileInType(1), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlType(OPT_SUB, b1(0), pickRegister1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] - src_data = [[DataType(3, 1)], # DataType( 3, 1)], - [], # DataType(3, 1), DataType( 4, 1)], - [DataType(4, 1)], # DataType( 5, 1)], + CtrlPktType = \ + mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, num_fu_inports, + num_fu_outports, num_tile_inports, + num_tile_outports) + src_ctrl_pkt = [ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pickRegister0, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(4), TileInType(3), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(4), TileInType(1), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] + src_data = [[DataType(3, 1)], + [], + [DataType(4, 1)], [DataType(5, 1), DataType(7, 1)]] - # src_predicate = [b1(0), b1(0), b1(0) ] src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] sink_out = [ # 7 - 3 = 4. @@ -140,16 +158,17 @@ def test_tile_alu(cmdline_opts): [], [], # 5 + 4 = 9; 7 - 3 = 4. - [DataType(9, 1), DataType( 4, 1)]] + [DataType(9, 1), DataType(4, 1)]] th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, ctrl_mem_size, data_mem_size, - num_fu_inports, num_fu_outports, - src_data, src_opt, opt_waddr, sink_out) + CtrlPktType, CtrlSignalType, ctrl_mem_size, + data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, src_data, + src_ctrl_pkt, sink_out) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) From 9e970de00cb1cf2dbd786ec0c1fa56712765a14d Mon Sep 17 00:00:00 2001 From: tancheng Date: Mon, 23 Dec 2024 08:29:08 +0000 Subject: [PATCH 05/19] [feature] Initiate test for CGRA with crossbar-based data mem and ring-based ctrl mem --- cgra/CgraCrossbarDataMemRingCtrlMemRTL.py | 166 ++++++++++++++ .../CgraCrossbarDataMemRingCtrlMemRTL_test.py | 209 ++++++++++++++++++ controller/ControllerRTL.py | 12 +- controller/test/ControllerRTL_test.py | 32 ++- tile/TileSeparateCrossbarRTL.py | 7 +- 5 files changed, 416 insertions(+), 10 deletions(-) create mode 100644 cgra/CgraCrossbarDataMemRingCtrlMemRTL.py create mode 100644 cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py diff --git a/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py new file mode 100644 index 0000000..2b9578b --- /dev/null +++ b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py @@ -0,0 +1,166 @@ +""" +========================================================================= +CgraCrossbarDataMemRingCtrlMemRTL.py +========================================================================= + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + +from pymtl3 import * +from ..controller.ControllerRTL import ControllerRTL +from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ..fu.single.MemUnitRTL import MemUnitRTL +from ..fu.single.AdderRTL import AdderRTL +from ..lib.util.common import * +from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ..lib.opt_type import * +from ..mem.data.DataMemWithCrossbarRTL import DataMemWithCrossbarRTL +from ..noc.ChannelNormalRTL import ChannelNormalRTL +from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ..tile.TileSeparateCrossbarRTL import TileSeparateCrossbarRTL + +class CgraCrossbarDataMemRingCtrlMemRTL(Component): + def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, + total_steps, FunctionUnit, FuList, controller2addr_map, + preload_data = None, preload_const = None): + + s.num_tiles = width * height + CtrlRingPos = mk_ring_pos(s.num_tiles) + s.num_mesh_ports = 4 + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + assert(data_mem_size_per_bank * num_banks_per_cgra <= \ + data_mem_size_global) + + # Interfaces + # s.recv_waddr = [RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] + # s.recv_wopt = [RecvIfcRTL(CtrlSignalType) for _ in range(s.num_tiles)] + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) + + # Explicitly provides the ValRdyRecvIfcRTL in the library, as the + # translation pass sometimes not able to distinguish the + # EnRdyRecvIfcRTL from it. + s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) + s.send_to_noc = ValRdySendIfcRTL(NocPktType) + + # s.recv_towards_controller = RecvIfcRTL(DataType) + # s.send_from_controller = SendIfcRTL(DataType) + + # Components + if preload_const == None: + preload_const = [[DataType(0, 0)] for _ in range(width*height)] + s.tile = [TileSeparateCrossbarRTL( + DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, + data_mem_size_global, num_ctrl, total_steps, 4, 2, + s.num_mesh_ports, s.num_mesh_ports, + const_list = preload_const[i]) for i in range( s.num_tiles)] + s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra, + height, height, + preload_data) + s.controller = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, + NocPktType, DataType, DataAddrType, + controller_id, controller2addr_map) + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 0) + + # Connections + # Connects data memory with controller. + # s.data_mem.recv_from_noc //= s.controller.send_to_master + # s.data_mem.send_to_noc //= s.controller.recv_from_master + + # The last `recv_raddr` is reserved to connect the controller. + s.data_mem.recv_raddr[height] //= s.controller.send_to_master_load_request_addr + s.data_mem.recv_waddr[height] //= s.controller.send_to_master_store_request_addr + s.data_mem.recv_wdata[height] //= s.controller.send_to_master_store_request_data + # Reserved ... + s.data_mem.recv_from_noc_rdata //= s.controller.send_to_master_load_response_data + # Reserved ... + s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_master_load_request_pkt + s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_master_load_response_pkt + s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_master_store_request_pkt + + s.recv_from_noc //= s.controller.recv_from_noc + s.send_to_noc //= s.controller.send_to_noc + + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_ctrl_pkt //= s.controller.recv_from_cpu_ctrl_pkt + + # s.recv_towards_controller //= s.controller.recv_from_master + # s.send_from_controller //= s.controller.send_to_master + + # Connects ring with each control memory. + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.tile[i].recv_ctrl_pkt + + s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_ctrl_pkt + for i in range(1, s.num_tiles): + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + + for i in range(s.num_tiles): + # s.recv_waddr[i] //= s.tile[i].recv_waddr + # s.recv_wopt[i] //= s.tile[i].recv_wopt + + if i // width > 0: + s.tile[i].send_data[PORT_SOUTH] //= s.tile[i-width].recv_data[PORT_NORTH] + + if i // width < height - 1: + s.tile[i].send_data[PORT_NORTH] //= s.tile[i+width].recv_data[PORT_SOUTH] + + if i % width > 0: + s.tile[i].send_data[PORT_WEST] //= s.tile[i-1].recv_data[PORT_EAST] + + if i % width < width - 1: + s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] + + if i // width == 0: + s.tile[i].send_data[PORT_SOUTH].rdy //= 0 + s.tile[i].recv_data[PORT_SOUTH].en //= 0 + s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) + + if i // width == height - 1: + s.tile[i].send_data[PORT_NORTH].rdy //= 0 + s.tile[i].recv_data[PORT_NORTH].en //= 0 + s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) + + if i % width == 0: + s.tile[i].send_data[PORT_WEST].rdy //= 0 + s.tile[i].recv_data[PORT_WEST].en //= 0 + s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) + + if i % width == width - 1: + s.tile[i].send_data[PORT_EAST].rdy //= 0 + s.tile[i].recv_data[PORT_EAST].en //= 0 + s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) + + if i % width == 0: + s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] + s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[i//width] + s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[i//width] + s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[i//width] + else: + s.tile[i].to_mem_raddr.rdy //= 0 + s.tile[i].from_mem_rdata.en //= 0 + s.tile[i].from_mem_rdata.msg //= DataType(0, 0) + s.tile[i].to_mem_waddr.rdy //= 0 + s.tile[i].to_mem_wdata.rdy //= 0 + + # Line trace + def line_trace( s ): + # str = "||".join([ x.element.line_trace() for x in s.tile ]) + # str += " :: [" + s.data_mem.line_trace() + "]" + res = "||\n".join([ (("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) + for (i,x) in enumerate(s.tile) ]) + res += "\n :: [" + s.data_mem.line_trace() + "] \n" + return res + diff --git a/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py new file mode 100644 index 0000000..e604f9f --- /dev/null +++ b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -0,0 +1,209 @@ +""" +========================================================================== +CgraCrossbarDataMemRingCtrlMemRTL_test.py +========================================================================== +Test cases for CGRA with crossbar-based data memory and ring-based control +memory of each tile. + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + + +from pymtl3 import * +from pymtl3.stdlib.test_utils import (run_sim, + config_model_with_cmdline_opts) +from pymtl3.passes.backends.verilog import (VerilogTranslationPass, + VerilogVerilatorImportPass) +from ..CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.ShifterRTL import ShifterRTL +from ...lib.messages import * +from ...lib.cmd_type import * +from ...lib.opt_type import * +from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DUT, FunctionUnit, FuList, DataType, + PredicateType, CtrlPktType, CtrlSignalType, NocPktType, + CmdType, ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_steps, controller2addr_map): + + s.num_tiles = width * height + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + # s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) + # for i in range(s.num_tiles)] + + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + ctrl_steps, ctrl_steps, FunctionUnit, FuList, + controller2addr_map) + + # Connections + s.dut.send_to_noc.rdy //= 0 + s.dut.recv_from_noc.val //= 0 + s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) + + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + + def done(s): + return s.src_ctrl_pkt.done() + + def line_trace(s): + return s.dut.line_trace() + +def test_homo_2x2(cmdline_opts): + num_tile_inports = 4 + num_tile_outports = 4 + num_fu_inports = 4 + num_fu_outports = 2 + num_routing_outports = num_tile_outports + num_fu_inports + ctrl_mem_size = 6 + data_mem_size_global = 512 + data_mem_size_per_bank = 32 + num_banks_per_cgra = 2 + width = 2 + height = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 + TileInType = mk_bits(clog2(num_tile_inports + 1)) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + FuOutType = mk_bits(clog2(num_fu_outports + 1)) + addr_nbits = clog2(data_mem_size_global) + AddrType = mk_bits(addr_nbits) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + num_tiles = width * height + DUT = CgraCrossbarDataMemRingCtrlMemRTL + FunctionUnit = FlexibleFuRTL + FuList = [MemUnitRTL, AdderRTL] + DataType = mk_data(32, 1) + PredicateType = mk_predicate(1, 1) + + CmdType = mk_bits(4) + ControllerIdType = mk_bits(clog2(num_terminals)) + controller_id = 1 + controller2addr_map = { + 0: [0, 3], + 1: [4, 7], + 2: [8, 11], + 3: [12, 15], + } + + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, + addr_nbits = addr_nbits, + data_nbits = 32, + predicate_nbits = 1) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + # TODO: make below as TileInType(5) to double check. + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) + + # ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), + # CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] + + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_mem_size, controller2addr_map) + th.elaborate() + th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, + ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', + 'ALWCOMBORDER']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + run_sim(th) + diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index 27edf76..032b8ab 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -13,6 +13,7 @@ from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL from ..lib.basic.val_rdy.ifcs import SendIfcRTL as ValRdySendIfcRTL from ..lib.basic.val_rdy.ifcs import RecvIfcRTL as ValRdyRecvIfcRTL +from ..lib.basic.val_rdy.queues import NormalQueueRTL from ..noc.ChannelNormalRTL import ChannelNormalRTL from ..noc.PyOCN.pymtl3_net.xbar.XbarBypassQueueRTL import XbarBypassQueueRTL from ..lib.cmd_type import * @@ -20,7 +21,7 @@ class ControllerRTL(Component): - def construct(s, ControllerIdType, CmdType, NocPktType, + def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, CGRADataType, CGRAAddrType, controller_id, controller2addr_map): @@ -29,6 +30,9 @@ def construct(s, ControllerIdType, CmdType, NocPktType, s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) s.send_to_noc = ValRdySendIfcRTL(NocPktType) + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) + s.send_to_ctrl_ring_ctrl_pkt = ValRdySendIfcRTL(CtrlPktType) + # Request from/to master. s.recv_from_master_load_request_pkt = RecvIfcRTL(NocPktType) s.recv_from_master_load_response_pkt = RecvIfcRTL(NocPktType) @@ -60,6 +64,8 @@ def construct(s, ControllerIdType, CmdType, NocPktType, # termination). s.crossbar = XbarBypassQueueRTL(NocPktType, 3, 1) + s.recv_ctrl_pkt_queue = NormalQueueRTL(CtrlPktType) + # # TODO: below ifcs should be connected through another NoC within # # one CGRA, instead of per-tile and performing like a bus. # # Configuration signals to be written into and read from per-tile @@ -104,6 +110,10 @@ def construct(s, ControllerIdType, CmdType, NocPktType, s.send_to_master_store_request_addr_queue.send //= s.send_to_master_store_request_addr s.send_to_master_store_request_data_queue.send //= s.send_to_master_store_request_data + # For control signals delivery from CPU to tiles. + s.recv_from_cpu_ctrl_pkt //= s.recv_ctrl_pkt_queue.recv + s.recv_ctrl_pkt_queue.send //= s.send_to_ctrl_ring_ctrl_pkt + @update def update_received_msg(): kLoadRequestInportIdx = 0 diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 18ebaef..57b78e2 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -29,8 +29,8 @@ class TestHarness(Component): - def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, - controller_id, + def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, + AddrType, PktType, controller_id, from_master_load_request_pkt_msgs, from_master_load_response_pkt_msgs, from_master_store_request_pkt_msgs, @@ -56,8 +56,9 @@ def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, s.src_from_noc_val_rdy = TestValRdySrcRTL(PktType, from_noc_pkts) s.sink_to_noc_val_rdy = TestNetSinkRTL(PktType, expected_to_noc_pkts, cmp_fn = cmp_func) - s.dut = ControllerRTL(ControllerIdType, CmdType, PktType, MsgType, - AddrType, controller_id, controller2addr_map) + s.dut = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, + PktType, MsgType, AddrType, controller_id, + controller2addr_map) # Connections s.src_from_master_load_request_pkt_en_rdy.send //= s.dut.recv_from_master_load_request_pkt @@ -72,6 +73,10 @@ def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, s.src_from_noc_val_rdy.send //= s.dut.recv_from_noc s.dut.send_to_noc //= s.sink_to_noc_val_rdy.recv + s.dut.recv_from_cpu_ctrl_pkt.val //= 0 + s.dut.recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() + s.dut.send_to_ctrl_ring_ctrl_pkt.rdy //= 0 + def done(s): return s.src_from_master_load_request_pkt_en_rdy.done() and \ s.src_from_master_load_response_pkt_en_rdy.done() and \ @@ -137,6 +142,13 @@ def mk_src_pkts( nterminals, lst ): nterminals = 4 CmdType = mk_bits(4) ControllerIdType = mk_bits(clog2(nterminals)) +num_ctrl_actions = 8 +ctrl_mem_size = 16 +num_ctrl_operations = 64 +num_fu_inports = 2 +num_fu_outports = 2 +num_tile_inports = 4 +num_tile_outports = 4 data_mem_size_global = 16 addr_nbits = clog2(data_mem_size_global) AddrType = mk_bits(addr_nbits) @@ -150,6 +162,15 @@ def mk_src_pkts( nterminals, lst ): 3: [12, 15], } +CtrlPktType = mk_ring_across_tiles_pkt(nterminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + Pkt = mk_ring_multi_cgra_pkt(nterminals, addr_nbits = addr_nbits, data_nbits = data_nbits, @@ -212,7 +233,8 @@ def mk_src_pkts( nterminals, lst ): def test_simple(): print("controller2addr_map: ", controller2addr_map) - th = TestHarness(ControllerIdType, CmdType, DataType, + th = TestHarness(ControllerIdType, CtrlPktType, + CmdType, DataType, AddrType, Pkt, controller_id, from_master_load_request_pkts, from_master_load_response_pkts, diff --git a/tile/TileSeparateCrossbarRTL.py b/tile/TileSeparateCrossbarRTL.py index d083038..0a6250d 100644 --- a/tile/TileSeparateCrossbarRTL.py +++ b/tile/TileSeparateCrossbarRTL.py @@ -37,10 +37,9 @@ class TileSeparateCrossbarRTL(Component): def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, - Fu = FlexibleFuRTL, + ctrl_mem_size, data_mem_size, num_ctrl, total_steps, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, Fu = FlexibleFuRTL, FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], const_list = None): From 9e88e5b9746f286c11a2f08f99cf59a5f143df4e Mon Sep 17 00:00:00 2001 From: tancheng Date: Mon, 23 Dec 2024 23:05:35 +0000 Subject: [PATCH 06/19] [feature] Enable ring-based multi-cgra with ring-based ctrl memory --- mem/ctrl/CtrlMemDynamicRTL.py | 9 - scale_out/RingMultiCgraRingCtrlMemRTL.py | 74 ++++++ scale_out/test/RingMultiCGRARTL_test.py | 2 +- .../test/RingMultiCgraRingCtrlMemRTL_test.py | 216 ++++++++++++++++++ 4 files changed, 291 insertions(+), 10 deletions(-) create mode 100644 scale_out/RingMultiCgraRingCtrlMemRTL.py create mode 100644 scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index 1bde5ac..bce3138 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -56,15 +56,10 @@ def construct(s, CtrlPktType, CtrlSignalType, ctrl_mem_size, @update def update_msg(): - # s.recv_pkt_queue.enq_en @= s.recv_pkt.en & s.recv_pkt_queue.enq_rdy - # s.recv_pkt_queue.enq_msg @= CtrlPktType() s.reg_file.wen[0] @= 0 s.reg_file.wdata[0] @= CtrlSignalType() s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr - # if s.recv_pkt.en: - # s.recv_pkt_queue.enq_msg @= s.recv_pkt.msg - if s.recv_pkt_queue.send.val & (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG): s.reg_file.wen[0] @= 1 # s.recv_pkt_queue.deq_en s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr @@ -79,7 +74,6 @@ def update_msg(): for i in range(num_tile_inports): s.reg_file.wdata[0].routing_predicate_in[i] @= s.recv_pkt_queue.send.msg.ctrl_routing_predicate_in[i] - # @yo96? depending on data, causing combinational loop or not? if (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG) | \ (s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH) | \ (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE) | \ @@ -99,7 +93,6 @@ def update_send_out_signal(): s.send_ctrl.en @= b1(0) else: s.send_ctrl.en @= s.send_ctrl.rdy - # @yo96? What would happen if we overwrite? ok? if s.recv_pkt_queue.send.val & \ ((s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE) | \ (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE)): @@ -120,9 +113,7 @@ def update_whether_we_can_iterate_ctrl(): @update_ff def update_raddr(): - # if s.reg_file.rdata[0].ctrl != OPT_START: if s.start_iterate_ctrl == b1(1): - # @yo96? There is no else, what would happen on the s.times and raddr[0]? if (total_ctrl_steps == 0) | \ (s.times < TimeType(total_ctrl_steps)): s.times <<= s.times + TimeType(1) diff --git a/scale_out/RingMultiCgraRingCtrlMemRTL.py b/scale_out/RingMultiCgraRingCtrlMemRTL.py new file mode 100644 index 0000000..d84b1bb --- /dev/null +++ b/scale_out/RingMultiCgraRingCtrlMemRTL.py @@ -0,0 +1,74 @@ +""" +========================================================================== +RingMultiCgraRingCtrlMemRTL.py +========================================================================== +Ring connecting multiple CGRAs, each CGRA contains one controller. + +Author : Cheng Tan + Date : Dec 23, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ..cgra.CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL +from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ..lib.opt_type import * +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL + +class RingMultiCgraRingCtrlMemRTL(Component): + def construct(s, CGRADataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, num_terminals, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_ctrl, total_steps, FunctionUnit, FuList, + controller2addr_map, preload_data = None, + preload_const = None): + + # Constant + RingPos = mk_ring_pos(num_terminals) + s.num_terminals = num_terminals + s.num_tiles = width * height + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + ControllerIdType = mk_bits(clog2(num_terminals)) + + # Interface + # # Request from/to CPU. + # s.recv_from_cpu = RecvIfcRTL(CGRADataType) + # s.send_to_cpu = SendIfcRTL(CGRADataType) + # s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] + # for _ in range(s.num_terminals)] + # s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] + # for _ in range(s.num_terminals)] + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) + + # Components + s.cgra = [CgraCrossbarDataMemRingCtrlMemRTL( + CGRADataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, terminal_id, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, + total_steps, FunctionUnit, FuList, controller2addr_map, + preload_data = None, preload_const = None) + for terminal_id in range(s.num_terminals)] + s.ring = RingNetworkRTL(NocPktType, RingPos, num_terminals, 0) + + # Connections + s.recv_from_cpu_ctrl_pkt //= s.cgra[0].recv_from_cpu_ctrl_pkt + for i in range(s.num_terminals): + s.ring.send[i] //= s.cgra[i].recv_from_noc + s.ring.recv[i] //= s.cgra[i].send_to_noc + + for i in range(1, s.num_terminals): + s.cgra[i].recv_from_cpu_ctrl_pkt.val //= 0 + s.cgra[i].recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() + # s.recv_waddr[i][j] //= s.cgra[i].recv_waddr[j] + # s.recv_wopt[i][j] //= s.cgra[i].recv_wopt[j] + + def line_trace(s): + res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.cgra)]) + res += " ## ring: " + s.ring.line_trace() + return res + diff --git a/scale_out/test/RingMultiCGRARTL_test.py b/scale_out/test/RingMultiCGRARTL_test.py index a73a2b9..13ecc37 100644 --- a/scale_out/test/RingMultiCGRARTL_test.py +++ b/scale_out/test/RingMultiCGRARTL_test.py @@ -1,6 +1,6 @@ """ ========================================================================== -CGRAWithControllerRTL_test.py +RingMultiCGRARTL_test.py ========================================================================== Test cases for CGRA with controller. diff --git a/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py new file mode 100644 index 0000000..5f30261 --- /dev/null +++ b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py @@ -0,0 +1,216 @@ +""" +========================================================================== +RingMultiCgraRingCtrlMemRTL_test.py +========================================================================== +Test cases for CGRA with controller. + +Author : Cheng Tan + Date : Dec 23, 2024 +""" + + +from pymtl3 import * +from pymtl3.stdlib.test_utils import (run_sim, + config_model_with_cmdline_opts) +from pymtl3.passes.backends.verilog import (VerilogTranslationPass, + VerilogVerilatorImportPass) +from ..RingMultiCgraRingCtrlMemRTL import RingMultiCgraRingCtrlMemRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.ShifterRTL import ShifterRTL +from ...lib.messages import * +from ...lib.opt_type import * +from ...lib.cmd_type import * +from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + num_terminals, width, height, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, + controller2addr_map): + + s.num_terminals = num_terminals + s.num_tiles = width * height + # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + # s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + # s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, num_terminals, width, height, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, ctrl_steps, ctrl_steps, + FunctionUnit, FuList, controller2addr_map) + + # Connections + # s.dut.data_mem.recv_from_noc.rdy //= 0 + # s.dut.data_mem.send_to_noc.msg //= DataType(0, 0) + # s.dut.data_mem.send_to_noc.en //= 0 + # s.src_val_rdy.send //= s.dut.recv_from_other + # s.dut.send_to_other //= s.sink_val_rdy.recv + + # s.dut.recv_towards_controller.en //= 0 + # s.dut.recv_towards_controller.msg //= DataType(0, 0) + # s.dut.send_from_controller.rdy //= 0 + + # for i in range(num_terminals): + # for j in range(s.num_tiles): + # connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) + # connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + + def done(s): + return s.src_ctrl_pkt.done() + # for i in range(s.num_terminals): + # for j in range(s.num_tiles): + # if not s.src_opt[i][j].done(): + # return False + # return True + + def line_trace(s): + return s.dut.line_trace() + +def test_homo_2x2(cmdline_opts): + num_tile_inports = 4 + num_tile_outports = 4 + num_fu_inports = 4 + num_fu_outports = 2 + num_routing_outports = num_tile_outports + num_fu_inports + ctrl_mem_size = 6 + data_mem_size_global = 32 + data_mem_size_per_bank = 4 + num_banks_per_cgra = 2 + num_terminals = 4 + width = 2 + height = 2 + num_ctrl_actions = 6 + num_ctrl_operations = 64 + TileInType = mk_bits(clog2(num_tile_inports + 1)) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + FuOutType = mk_bits(clog2(num_fu_outports + 1)) + ctrl_addr_nbits = clog2(ctrl_mem_size) + # CtrlAddrType = mk_bits(ctrl_addr_nbits) + data_addr_nbits = clog2(data_mem_size_global) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + num_tiles = width * height + DUT = RingMultiCgraRingCtrlMemRTL + FunctionUnit = FlexibleFuRTL + FuList = [MemUnitRTL, AdderRTL] + DataType = mk_data(32, 1) + PredicateType = mk_predicate(1, 1) + CmdType = mk_bits(4) + controller2addr_map = { + 0: [0, 7], + 1: [8, 15], + 2: [16, 23], + 3: [24, 31], + } + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, + addr_nbits = data_addr_nbits, + data_nbits = 32, + predicate_nbits = 1) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + # TODO: make below as TileInType(5) to double check. + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) + + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, num_terminals, width, height, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_mem_size, + controller2addr_map) + th.elaborate() + th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, + ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', + 'ALWCOMBORDER']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) + run_sim(th) + From 83678fc058c6a002ea64352cf90384cb2d6fdab6 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 03:01:10 +0000 Subject: [PATCH 07/19] [cleanup] Remove unnecessary tests --- cgra/CGRASeparateCrossbarRTL.py | 104 -------- cgra/CGRAWithCrossbarDataMemRTL.py | 152 ------------ cgra/test/CGRASeparateCrossbarRTL_test.py | 238 ------------------- cgra/test/CGRAWithCrossbarDataMemRTL_test.py | 183 -------------- controller/ControllerRTL.py | 6 + 5 files changed, 6 insertions(+), 677 deletions(-) delete mode 100644 cgra/CGRASeparateCrossbarRTL.py delete mode 100644 cgra/CGRAWithCrossbarDataMemRTL.py delete mode 100644 cgra/test/CGRASeparateCrossbarRTL_test.py delete mode 100644 cgra/test/CGRAWithCrossbarDataMemRTL_test.py diff --git a/cgra/CGRASeparateCrossbarRTL.py b/cgra/CGRASeparateCrossbarRTL.py deleted file mode 100644 index 8ea53bf..0000000 --- a/cgra/CGRASeparateCrossbarRTL.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -========================================================================= -CGRASeparateCrossbarRTL.py -========================================================================= - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - -from pymtl3 import * -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.AdderRTL import AdderRTL -from ..lib.util.common import * -from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.opt_type import * -from ..mem.data.DataMemCL import DataMemCL -from ..mem.data.DataMemRTL import DataMemRTL -from ..noc.ChannelNormalRTL import ChannelNormalRTL -from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL -from ..tile.TileSeparateCrossbarRTL import TileSeparateCrossbarRTL - - -class CGRASeparateCrossbarRTL(Component): - def construct(s, DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, num_ctrl, total_steps, - FunctionUnit, FuList, preload_data = None, - preload_const = None): - - s.num_tiles = width * height - s.num_mesh_ports = 4 - AddrType = mk_bits(clog2(ctrl_mem_size)) - - # Interfaces - s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(s.num_tiles)] - s.recv_wopt = [RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - - # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(width*height)] - s.tile = [TileSeparateCrossbarRTL(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, const_list = preload_const[i]) - for i in range( s.num_tiles)] - s.data_mem = DataMemRTL(DataType, data_mem_size, height, height, preload_data) - - # Connections - for i in range(s.num_tiles): - s.recv_waddr[i] //= s.tile[i].recv_waddr - s.recv_wopt[i] //= s.tile[i].recv_wopt - - if i // width > 0: - s.tile[i].send_data[PORT_SOUTH] //= s.tile[i-width].recv_data[PORT_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_NORTH] //= s.tile[i+width].recv_data[PORT_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_WEST] //= s.tile[i-1].recv_data[PORT_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] - - if i // width == 0: - s.tile[i].send_data[PORT_SOUTH].rdy //= 0 - s.tile[i].recv_data[PORT_SOUTH].en //= 0 - s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_NORTH].rdy //= 0 - s.tile[i].recv_data[PORT_NORTH].en //= 0 - s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].send_data[PORT_WEST].rdy //= 0 - s.tile[i].recv_data[PORT_WEST].en //= 0 - s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) - - if i % width == width - 1: - s.tile[i].send_data[PORT_EAST].rdy //= 0 - s.tile[i].recv_data[PORT_EAST].en //= 0 - s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[i//width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[i//width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[i//width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.en //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace( s ): - # str = "||".join([ x.element.line_trace() for x in s.tile ]) - # str += " :: [" + s.data_mem.line_trace() + "]" - res = "||\n".join([ (("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile) ]) - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res diff --git a/cgra/CGRAWithCrossbarDataMemRTL.py b/cgra/CGRAWithCrossbarDataMemRTL.py deleted file mode 100644 index 646fd88..0000000 --- a/cgra/CGRAWithCrossbarDataMemRTL.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -========================================================================= -CGRAWithCrossbarDataMemRTL.py -========================================================================= - -Author : Cheng Tan - Date : Dec 13, 2024 -""" - -from pymtl3 import * -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.AdderRTL import AdderRTL -from ..lib.util.common import * -from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL -from ..lib.opt_type import * -from ..mem.data.DataMemWithCrossbarRTL import DataMemWithCrossbarRTL -from ..noc.ChannelNormalRTL import ChannelNormalRTL -from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL -from ..tile.TileSeparateCrossbarRTL import TileSeparateCrossbarRTL -from ..controller.ControllerRTL import ControllerRTL - - -class CGRAWithCrossbarDataMemRTL(Component): - - def construct(s, DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, - width, height, ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, - total_steps, FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None): - - s.num_tiles = width * height - s.num_mesh_ports = 4 - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - assert(data_mem_size_per_bank * num_banks_per_cgra <= \ - data_mem_size_global) - - # Interfaces - s.recv_waddr = [RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - s.recv_wopt = [RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - - # Explicitly provides the ValRdyRecvIfcRTL in the library, as the - # translation pass sometimes not able to distinguish the - # EnRdyRecvIfcRTL from it. - s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) - s.send_to_noc = ValRdySendIfcRTL(NocPktType) - - # s.recv_towards_controller = RecvIfcRTL(DataType) - # s.send_from_controller = SendIfcRTL(DataType) - - - # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(width*height)] - s.tile = [TileSeparateCrossbarRTL(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size_global, - num_ctrl, total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, const_list = preload_const[i]) - for i in range( s.num_tiles)] - s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra, height, height, - preload_data) - s.controller = ControllerRTL(ControllerIdType, CmdType, NocPktType, - DataType, DataAddrType, controller_id, - controller2addr_map) - - # Connections - - # Connects data memory with controller. - # s.data_mem.recv_from_noc //= s.controller.send_to_master - # s.data_mem.send_to_noc //= s.controller.recv_from_master - - # The last `recv_raddr` is reserved to connect the controller. - s.data_mem.recv_raddr[height] //= s.controller.send_to_master_load_request_addr - s.data_mem.recv_waddr[height] //= s.controller.send_to_master_store_request_addr - s.data_mem.recv_wdata[height] //= s.controller.send_to_master_store_request_data - # Reserved ... - s.data_mem.recv_from_noc_rdata //= s.controller.send_to_master_load_response_data - # Reserved ... - s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_master_load_request_pkt - s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_master_load_response_pkt - s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_master_store_request_pkt - - s.recv_from_noc //= s.controller.recv_from_noc - s.send_to_noc //= s.controller.send_to_noc - - # s.recv_towards_controller //= s.controller.recv_from_master - # s.send_from_controller //= s.controller.send_to_master - - for i in range(s.num_tiles): - s.recv_waddr[i] //= s.tile[i].recv_waddr - s.recv_wopt[i] //= s.tile[i].recv_wopt - - if i // width > 0: - s.tile[i].send_data[PORT_SOUTH] //= s.tile[i-width].recv_data[PORT_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_NORTH] //= s.tile[i+width].recv_data[PORT_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_WEST] //= s.tile[i-1].recv_data[PORT_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] - - if i // width == 0: - s.tile[i].send_data[PORT_SOUTH].rdy //= 0 - s.tile[i].recv_data[PORT_SOUTH].en //= 0 - s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_NORTH].rdy //= 0 - s.tile[i].recv_data[PORT_NORTH].en //= 0 - s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].send_data[PORT_WEST].rdy //= 0 - s.tile[i].recv_data[PORT_WEST].en //= 0 - s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) - - if i % width == width - 1: - s.tile[i].send_data[PORT_EAST].rdy //= 0 - s.tile[i].recv_data[PORT_EAST].en //= 0 - s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[i//width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[i//width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[i//width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.en //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - - # Line trace - def line_trace( s ): - # str = "||".join([ x.element.line_trace() for x in s.tile ]) - # str += " :: [" + s.data_mem.line_trace() + "]" - res = "||\n".join([ (("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile) ]) - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res diff --git a/cgra/test/CGRASeparateCrossbarRTL_test.py b/cgra/test/CGRASeparateCrossbarRTL_test.py deleted file mode 100644 index a80e20b..0000000 --- a/cgra/test/CGRASeparateCrossbarRTL_test.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -========================================================================== -CGRASeparateCrossbarRTL_test.py -========================================================================== -Test cases for CGRAs with different configurations. - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRASeparateCrossbarRTL import CGRASeparateCrossbarRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr): - - s.num_tiles = width * height - AddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(AddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, len(src_opt[0]), - len(src_opt[0]), FunctionUnit, FuList) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size = 8 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - -def test_hetero_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - data_mem_size = 8 - num_fu_in = 4 - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.set_param("top.dut.tile[1].construct", FuList = [ShifterRTL]) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - #th.set_param("top.dut.tile[1].construct", FuList=[MemUnitRTL,ShifterRTL]) - run_sim(th) - diff --git a/cgra/test/CGRAWithCrossbarDataMemRTL_test.py b/cgra/test/CGRAWithCrossbarDataMemRTL_test.py deleted file mode 100644 index ea4e7f9..0000000 --- a/cgra/test/CGRAWithCrossbarDataMemRTL_test.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -========================================================================== -CGRAWithCrossbarDataMemRTL_test.py -========================================================================== -Test cases for CGRA with crossbar-based data memory. - -Author : Cheng Tan - Date : Dec 14, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, - PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, controller_id, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - src_opt, ctrl_waddr, controller2addr_map): - - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, - width, height, ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - len(src_opt[0]), len(src_opt[0]), FunctionUnit, FuList, - controller2addr_map) - - # Connections - s.dut.send_to_noc.rdy //= 0 - s.dut.recv_from_noc.val //= 0 - s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size_global = 512 - data_mem_size_per_bank = 32 - num_banks_per_cgra = 2 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - addr_nbits = clog2(data_mem_size_global) - AddrType = mk_bits(addr_nbits) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRAWithCrossbarDataMemRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) - PredicateType = mk_predicate(1, 1) - - nterminals = 4 - CmdType = mk_bits(4) - ControllerIdType = mk_bits(clog2(nterminals)) - controller_id = 1 - controller2addr_map = { - 0: [0, 3], - 1: [4, 7], - 2: [8, 11], - 3: [12, 15], - } - - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = nterminals, - addr_nbits = addr_nbits, - data_nbits = 32, - predicate_nbits = 1) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, ControllerIdType, - controller_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, - controller2addr_map) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index 032b8ab..68ed0b5 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -111,6 +111,12 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, s.send_to_master_store_request_data_queue.send //= s.send_to_master_store_request_data # For control signals delivery from CPU to tiles. + # TODO: https://github.com/tancheng/VectorCGRA/issues/11 -- The request needs + # to go through the crossbar for arbitration as well. The packet targeting local + # tiles can be delivered via thr ring within the CGRA; The packet targetting + # other CGRAs can be delivered via the NoC across CGRAs. Note that the packet + # format can be in a universal fashion to support both data and config. Later + # on, the format can be packet-based or flit-based. s.recv_from_cpu_ctrl_pkt //= s.recv_ctrl_pkt_queue.recv s.recv_ctrl_pkt_queue.send //= s.send_to_ctrl_ring_ctrl_pkt From 2e00233dacbd4ac6580926a81d54724b74fcb89d Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 03:45:39 +0000 Subject: [PATCH 08/19] [feature] Enable translation --- .github/workflows/python-package.yml | 5 +- .../CgraCrossbarDataMemRingCtrlMemRTL_test.py | 9 +- .../CgraCrossbarDataMemRingCtrlMemRTL_test.py | 202 ++++++++++++++++++ mem/ctrl/CtrlMemDynamicRTL.py | 14 +- 4 files changed, 219 insertions(+), 11 deletions(-) create mode 100644 cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2850045..e1f8af1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -75,10 +75,11 @@ jobs: # Ring network simulation. pytest ../noc/PyOCN/pymtl3_net/ringnet/test/RingNetworkRTL_test.py --tb=short -sv # CGRA with separate crossbars (for tiles and FUs), crossbar-based data - # memory (for multi-bank), and controller. - pytest --tb=short -sv ../cgra/translate/CGRAWithCrossbarDataMemRTL_test.py --test-verilog --dump-vtb --dump-vcd + # memory (for multi-bank), ring-based control memories, and controller. + pytest --tb=short -sv ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd # CGRAs are interconnected with ring topology. The CGRA contains # separate crossbars (for tiles and FUs), crossbar-based data memory (for # multi-bank), and controller. pytest --tb=short -sv ../scale_out/translate/RingMultiCGRARTL_test.py --test-verilog --dump-vtb --dump-vcd + pytest --tb=short -sv ../scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd diff --git a/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py index e604f9f..a4db3ce 100644 --- a/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py +++ b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -42,11 +42,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, s.num_tiles = width * height CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) DataAddrType = mk_bits(clog2(data_mem_size_global)) - s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) - # s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) - # for i in range(s.num_tiles)] - s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, @@ -191,9 +187,6 @@ def test_homo_2x2(cmdline_opts): for opt_per_tile in src_opt_per_tile: src_ctrl_pkt.extend(opt_per_tile) - # ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - # CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, ControllerIdType, controller_id, width, height, @@ -204,6 +197,6 @@ def test_homo_2x2(cmdline_opts): th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) diff --git a/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py new file mode 100644 index 0000000..a4db3ce --- /dev/null +++ b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -0,0 +1,202 @@ +""" +========================================================================== +CgraCrossbarDataMemRingCtrlMemRTL_test.py +========================================================================== +Test cases for CGRA with crossbar-based data memory and ring-based control +memory of each tile. + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + + +from pymtl3 import * +from pymtl3.stdlib.test_utils import (run_sim, + config_model_with_cmdline_opts) +from pymtl3.passes.backends.verilog import (VerilogTranslationPass, + VerilogVerilatorImportPass) +from ..CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.ShifterRTL import ShifterRTL +from ...lib.messages import * +from ...lib.cmd_type import * +from ...lib.opt_type import * +from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DUT, FunctionUnit, FuList, DataType, + PredicateType, CtrlPktType, CtrlSignalType, NocPktType, + CmdType, ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_steps, controller2addr_map): + + s.num_tiles = width * height + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + ctrl_steps, ctrl_steps, FunctionUnit, FuList, + controller2addr_map) + + # Connections + s.dut.send_to_noc.rdy //= 0 + s.dut.recv_from_noc.val //= 0 + s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) + + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + + def done(s): + return s.src_ctrl_pkt.done() + + def line_trace(s): + return s.dut.line_trace() + +def test_homo_2x2(cmdline_opts): + num_tile_inports = 4 + num_tile_outports = 4 + num_fu_inports = 4 + num_fu_outports = 2 + num_routing_outports = num_tile_outports + num_fu_inports + ctrl_mem_size = 6 + data_mem_size_global = 512 + data_mem_size_per_bank = 32 + num_banks_per_cgra = 2 + width = 2 + height = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 + TileInType = mk_bits(clog2(num_tile_inports + 1)) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + FuOutType = mk_bits(clog2(num_fu_outports + 1)) + addr_nbits = clog2(data_mem_size_global) + AddrType = mk_bits(addr_nbits) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + num_tiles = width * height + DUT = CgraCrossbarDataMemRingCtrlMemRTL + FunctionUnit = FlexibleFuRTL + FuList = [MemUnitRTL, AdderRTL] + DataType = mk_data(32, 1) + PredicateType = mk_predicate(1, 1) + + CmdType = mk_bits(4) + ControllerIdType = mk_bits(clog2(num_terminals)) + controller_id = 1 + controller2addr_map = { + 0: [0, 3], + 1: [4, 7], + 2: [8, 11], + 3: [12, 15], + } + + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, + addr_nbits = addr_nbits, + data_nbits = 32, + predicate_nbits = 1) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + # TODO: make below as TileInType(5) to double check. + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) + + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_mem_size, controller2addr_map) + th.elaborate() + th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, + ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', + 'ALWCOMBORDER']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) + run_sim(th) + diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index bce3138..f365166 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -56,9 +56,20 @@ def construct(s, CtrlPktType, CtrlSignalType, ctrl_mem_size, @update def update_msg(): + s.recv_pkt_queue.send.rdy @= 0 s.reg_file.wen[0] @= 0 - s.reg_file.wdata[0] @= CtrlSignalType() s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr + # Initializes the fields of the control signal. + # s.reg_file.wdata[0] @= CtrlSignalType() + s.reg_file.wdata[0].ctrl @= 0 + s.reg_file.wdata[0].predicate @= 0 + for i in range(num_fu_inports): + s.reg_file.wdata[0].fu_in[i] @= 0 + for i in range(num_routing_outports): + s.reg_file.wdata[0].routing_xbar_outport[i] @= 0 + s.reg_file.wdata[0].fu_xbar_outport[i] @= 0 + for i in range(num_tile_inports): + s.reg_file.wdata[0].routing_predicate_in[i] @= 0 if s.recv_pkt_queue.send.val & (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG): s.reg_file.wen[0] @= 1 # s.recv_pkt_queue.deq_en @@ -86,6 +97,7 @@ def update_msg(): @update def update_send_out_signal(): + s.send_ctrl.en @= 0 if s.start_iterate_ctrl == b1(1): if ((total_ctrl_steps > 0) & \ (s.times == TimeType(total_ctrl_steps))) | \ From c083826afaf2905467fd35d789a0ccdaec200fb1 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 03:50:06 +0000 Subject: [PATCH 09/19] [test] Actions --- .github/workflows/python-package.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e1f8af1..d260935 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -66,7 +66,8 @@ jobs: pytest ../cgra/translate/VectorCGRAKingMeshRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Separate crossbars CGRA simulation/translation. pytest ../cgra/test/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd - pytest ../cgra/translate/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../cgra/test/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x2 CGRA performs 2x2 matmul translation. pytest ../cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x3 CGRA performs 2x2 matmul simulation/translation. From 6ce63da7b5f7c60429c2eebd56211b884634fbc3 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 04:23:40 +0000 Subject: [PATCH 10/19] [feature] Remove unnecessary tests in actions for translation --- .github/workflows/python-package.yml | 2 - .../translate/CGRASeparateCrossbarRTL_test.py | 148 -------------- .../CGRAWithCrossbarDataMemRTL_test.py | 183 ----------------- scale_out/RingMultiCGRARTL.py | 71 ------- scale_out/test/RingMultiCGRARTL_test.py | 184 ------------------ scale_out/translate/RingMultiCGRARTL_test.py | 184 ------------------ 6 files changed, 772 deletions(-) delete mode 100644 cgra/translate/CGRASeparateCrossbarRTL_test.py delete mode 100644 cgra/translate/CGRAWithCrossbarDataMemRTL_test.py delete mode 100644 scale_out/RingMultiCGRARTL.py delete mode 100644 scale_out/test/RingMultiCGRARTL_test.py delete mode 100644 scale_out/translate/RingMultiCGRARTL_test.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d260935..56e38ab 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -65,8 +65,6 @@ jobs: # Kingmesh topology CGRA translation. pytest ../cgra/translate/VectorCGRAKingMeshRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Separate crossbars CGRA simulation/translation. - pytest ../cgra/test/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd - pytest ../cgra/test/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd pytest ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x2 CGRA performs 2x2 matmul translation. pytest ../cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd diff --git a/cgra/translate/CGRASeparateCrossbarRTL_test.py b/cgra/translate/CGRASeparateCrossbarRTL_test.py deleted file mode 100644 index ad00c6c..0000000 --- a/cgra/translate/CGRASeparateCrossbarRTL_test.py +++ /dev/null @@ -1,148 +0,0 @@ -""" -========================================================================== -CGRASeparateCrossbarRTL_test.py -========================================================================== -Test cases for CGRAs with different configurations. - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRASeparateCrossbarRTL import CGRASeparateCrossbarRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL -from ...lib.messages import * -from ...lib.opt_type import * - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr): - - s.num_tiles = width * height - AddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(AddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, len(src_opt[0]), - len(src_opt[0]), FunctionUnit, FuList) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size = 8 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - diff --git a/cgra/translate/CGRAWithCrossbarDataMemRTL_test.py b/cgra/translate/CGRAWithCrossbarDataMemRTL_test.py deleted file mode 100644 index ea4e7f9..0000000 --- a/cgra/translate/CGRAWithCrossbarDataMemRTL_test.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -========================================================================== -CGRAWithCrossbarDataMemRTL_test.py -========================================================================== -Test cases for CGRA with crossbar-based data memory. - -Author : Cheng Tan - Date : Dec 14, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, - PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, controller_id, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - src_opt, ctrl_waddr, controller2addr_map): - - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, - width, height, ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - len(src_opt[0]), len(src_opt[0]), FunctionUnit, FuList, - controller2addr_map) - - # Connections - s.dut.send_to_noc.rdy //= 0 - s.dut.recv_from_noc.val //= 0 - s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size_global = 512 - data_mem_size_per_bank = 32 - num_banks_per_cgra = 2 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - addr_nbits = clog2(data_mem_size_global) - AddrType = mk_bits(addr_nbits) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRAWithCrossbarDataMemRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) - PredicateType = mk_predicate(1, 1) - - nterminals = 4 - CmdType = mk_bits(4) - ControllerIdType = mk_bits(clog2(nterminals)) - controller_id = 1 - controller2addr_map = { - 0: [0, 3], - 1: [4, 7], - 2: [8, 11], - 3: [12, 15], - } - - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = nterminals, - addr_nbits = addr_nbits, - data_nbits = 32, - predicate_nbits = 1) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, ControllerIdType, - controller_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, - controller2addr_map) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - diff --git a/scale_out/RingMultiCGRARTL.py b/scale_out/RingMultiCGRARTL.py deleted file mode 100644 index e577283..0000000 --- a/scale_out/RingMultiCGRARTL.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -========================================================================== -RingMultiCGRARTL.py -========================================================================== -Ring connecting multiple CGRAs, each CGRA contains one controller. - -Author : Cheng Tan - Date : Dec 4, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.opt_type import * -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..cgra.CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos - - -class RingMultiCGRARTL(Component): - - def construct(s, CGRADataType, PredicateType, CtrlType, NocPktType, - CmdType, num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, num_ctrl, total_steps, - FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None): - - # Constant - RingPos = mk_ring_pos(num_terminals) - s.num_terminals = num_terminals - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - ControllerIdType = mk_bits(clog2(num_terminals)) - - # Interface - # # Request from/to CPU. - # s.recv_from_cpu = RecvIfcRTL(CGRADataType) - # s.send_to_cpu = SendIfcRTL(CGRADataType) - s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - for _ in range(s.num_terminals)] - s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - for _ in range(s.num_terminals)] - - # Components - s.cgra = [CGRAWithCrossbarDataMemRTL( - CGRADataType, PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, terminal_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None) - for terminal_id in range(s.num_terminals)] - s.ring = RingNetworkRTL(NocPktType, RingPos, num_terminals, 0) - - # Connections - for i in range(s.num_terminals): - s.ring.send[i] //= s.cgra[i].recv_from_noc - s.ring.recv[i] //= s.cgra[i].send_to_noc - - for j in range(s.num_tiles): - s.recv_waddr[i][j] //= s.cgra[i].recv_waddr[j] - s.recv_wopt[i][j] //= s.cgra[i].recv_wopt[j] - - - def line_trace(s): - res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) - for (i,x) in enumerate(s.cgra)]) - res += " ## ring: " + s.ring.line_trace() - return res - diff --git a/scale_out/test/RingMultiCGRARTL_test.py b/scale_out/test/RingMultiCGRARTL_test.py deleted file mode 100644 index 13ecc37..0000000 --- a/scale_out/test/RingMultiCGRARTL_test.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -========================================================================== -RingMultiCGRARTL_test.py -========================================================================== -Test cases for CGRA with controller. - -Author : Cheng Tan - Date : Dec 4, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..RingMultiCGRARTL import RingMultiCGRARTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, src_opt, - ctrl_waddr, controller2addr_map): - - s.num_terminals = num_terminals - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, len(src_opt[0]), len(src_opt[0]), - FunctionUnit, FuList, controller2addr_map) - - # Connections - # s.dut.data_mem.recv_from_noc.rdy //= 0 - # s.dut.data_mem.send_to_noc.msg //= DataType(0, 0) - # s.dut.data_mem.send_to_noc.en //= 0 - # s.src_val_rdy.send //= s.dut.recv_from_other - # s.dut.send_to_other //= s.sink_val_rdy.recv - - # s.dut.recv_towards_controller.en //= 0 - # s.dut.recv_towards_controller.msg //= DataType(0, 0) - # s.dut.send_from_controller.rdy //= 0 - - for i in range(num_terminals): - for j in range(s.num_tiles): - connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) - connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) - - def done(s): - for i in range(s.num_terminals): - for j in range(s.num_tiles): - if not s.src_opt[i][j].done(): - return False - return True - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size_global = 32 - data_mem_size_per_bank = 4 - num_banks_per_cgra = 2 - num_terminals = 4 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - ctrl_addr_nbits = clog2(ctrl_mem_size) - CtrlAddrType = mk_bits(ctrl_addr_nbits) - data_addr_nbits = clog2(data_mem_size_global) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - num_tiles = width * height - DUT = RingMultiCGRARTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) - PredicateType = mk_predicate(1, 1) - CmdType = mk_bits(4) - controller2addr_map = { - 0: [0, 7], - 1: [8, 15], - 2: [16, 23], - 3: [24, 31], - } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, - addr_nbits = data_addr_nbits, - data_nbits = 32, - predicate_nbits = 1) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, controller2addr_map) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - diff --git a/scale_out/translate/RingMultiCGRARTL_test.py b/scale_out/translate/RingMultiCGRARTL_test.py deleted file mode 100644 index a73a2b9..0000000 --- a/scale_out/translate/RingMultiCGRARTL_test.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -========================================================================== -CGRAWithControllerRTL_test.py -========================================================================== -Test cases for CGRA with controller. - -Author : Cheng Tan - Date : Dec 4, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..RingMultiCGRARTL import RingMultiCGRARTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, src_opt, - ctrl_waddr, controller2addr_map): - - s.num_terminals = num_terminals - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, len(src_opt[0]), len(src_opt[0]), - FunctionUnit, FuList, controller2addr_map) - - # Connections - # s.dut.data_mem.recv_from_noc.rdy //= 0 - # s.dut.data_mem.send_to_noc.msg //= DataType(0, 0) - # s.dut.data_mem.send_to_noc.en //= 0 - # s.src_val_rdy.send //= s.dut.recv_from_other - # s.dut.send_to_other //= s.sink_val_rdy.recv - - # s.dut.recv_towards_controller.en //= 0 - # s.dut.recv_towards_controller.msg //= DataType(0, 0) - # s.dut.send_from_controller.rdy //= 0 - - for i in range(num_terminals): - for j in range(s.num_tiles): - connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) - connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) - - def done(s): - for i in range(s.num_terminals): - for j in range(s.num_tiles): - if not s.src_opt[i][j].done(): - return False - return True - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size_global = 32 - data_mem_size_per_bank = 4 - num_banks_per_cgra = 2 - num_terminals = 4 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - ctrl_addr_nbits = clog2(ctrl_mem_size) - CtrlAddrType = mk_bits(ctrl_addr_nbits) - data_addr_nbits = clog2(data_mem_size_global) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - num_tiles = width * height - DUT = RingMultiCGRARTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) - PredicateType = mk_predicate(1, 1) - CmdType = mk_bits(4) - controller2addr_map = { - 0: [0, 7], - 1: [8, 15], - 2: [16, 23], - 3: [24, 31], - } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, - addr_nbits = data_addr_nbits, - data_nbits = 32, - predicate_nbits = 1) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, controller2addr_map) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - From b6c81ec7dfbd6cb1e219952bee71f3119d0f752f Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 04:27:06 +0000 Subject: [PATCH 11/19] [test] Remove unnecessary import --- mem/ctrl/CtrlMemDynamicRTL.py | 1 - mem/ctrl/RingMultiCtrlMemDynamicRTL.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index f365166..9a96fa1 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -10,7 +10,6 @@ """ from pymtl3 import * -# from pymtl3.stdlib.dstruct.queues import NormalQueue from pymtl3.stdlib.primitive import RegisterFile from ...lib.basic.en_rdy.ifcs import SendIfcRTL from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL diff --git a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py index 258ea8a..f01ea06 100644 --- a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py +++ b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py @@ -14,9 +14,8 @@ from ...lib.basic.en_rdy.ifcs import SendIfcRTL from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL from ...lib.opt_type import * -from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ...cgra.CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL class RingMultiCtrlMemDynamicRTL(Component): def construct(s, CtrlPktType, CtrlSignalType, width, height, From 5cf0d1b12c5f42794f72a115e426b7a8a0f5331d Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 04:41:18 +0000 Subject: [PATCH 12/19] [test] Remove old multi-cgra test --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 56e38ab..dc66428 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -79,6 +79,5 @@ jobs: # CGRAs are interconnected with ring topology. The CGRA contains # separate crossbars (for tiles and FUs), crossbar-based data memory (for # multi-bank), and controller. - pytest --tb=short -sv ../scale_out/translate/RingMultiCGRARTL_test.py --test-verilog --dump-vtb --dump-vcd pytest --tb=short -sv ../scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd From 52bcf97544534c928deeda2c2f6c2fdd50bbade1 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 04:48:47 +0000 Subject: [PATCH 13/19] [test] Include the missing test --- .../RingMultiCgraRingCtrlMemRTL_test.py | 216 ++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py diff --git a/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py new file mode 100644 index 0000000..5f30261 --- /dev/null +++ b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py @@ -0,0 +1,216 @@ +""" +========================================================================== +RingMultiCgraRingCtrlMemRTL_test.py +========================================================================== +Test cases for CGRA with controller. + +Author : Cheng Tan + Date : Dec 23, 2024 +""" + + +from pymtl3 import * +from pymtl3.stdlib.test_utils import (run_sim, + config_model_with_cmdline_opts) +from pymtl3.passes.backends.verilog import (VerilogTranslationPass, + VerilogVerilatorImportPass) +from ..RingMultiCgraRingCtrlMemRTL import RingMultiCgraRingCtrlMemRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.ShifterRTL import ShifterRTL +from ...lib.messages import * +from ...lib.opt_type import * +from ...lib.cmd_type import * +from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + num_terminals, width, height, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, + controller2addr_map): + + s.num_terminals = num_terminals + s.num_tiles = width * height + # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + # s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + # s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, num_terminals, width, height, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, ctrl_steps, ctrl_steps, + FunctionUnit, FuList, controller2addr_map) + + # Connections + # s.dut.data_mem.recv_from_noc.rdy //= 0 + # s.dut.data_mem.send_to_noc.msg //= DataType(0, 0) + # s.dut.data_mem.send_to_noc.en //= 0 + # s.src_val_rdy.send //= s.dut.recv_from_other + # s.dut.send_to_other //= s.sink_val_rdy.recv + + # s.dut.recv_towards_controller.en //= 0 + # s.dut.recv_towards_controller.msg //= DataType(0, 0) + # s.dut.send_from_controller.rdy //= 0 + + # for i in range(num_terminals): + # for j in range(s.num_tiles): + # connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) + # connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + + def done(s): + return s.src_ctrl_pkt.done() + # for i in range(s.num_terminals): + # for j in range(s.num_tiles): + # if not s.src_opt[i][j].done(): + # return False + # return True + + def line_trace(s): + return s.dut.line_trace() + +def test_homo_2x2(cmdline_opts): + num_tile_inports = 4 + num_tile_outports = 4 + num_fu_inports = 4 + num_fu_outports = 2 + num_routing_outports = num_tile_outports + num_fu_inports + ctrl_mem_size = 6 + data_mem_size_global = 32 + data_mem_size_per_bank = 4 + num_banks_per_cgra = 2 + num_terminals = 4 + width = 2 + height = 2 + num_ctrl_actions = 6 + num_ctrl_operations = 64 + TileInType = mk_bits(clog2(num_tile_inports + 1)) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + FuOutType = mk_bits(clog2(num_fu_outports + 1)) + ctrl_addr_nbits = clog2(ctrl_mem_size) + # CtrlAddrType = mk_bits(ctrl_addr_nbits) + data_addr_nbits = clog2(data_mem_size_global) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + num_tiles = width * height + DUT = RingMultiCgraRingCtrlMemRTL + FunctionUnit = FlexibleFuRTL + FuList = [MemUnitRTL, AdderRTL] + DataType = mk_data(32, 1) + PredicateType = mk_predicate(1, 1) + CmdType = mk_bits(4) + controller2addr_map = { + 0: [0, 7], + 1: [8, 15], + 2: [16, 23], + 3: [24, 31], + } + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, + addr_nbits = data_addr_nbits, + data_nbits = 32, + predicate_nbits = 1) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + # TODO: make below as TileInType(5) to double check. + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) + + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, num_terminals, width, height, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_mem_size, + controller2addr_map) + th.elaborate() + th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, + ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', + 'ALWCOMBORDER']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) + run_sim(th) + From d95490a49fea08105f9512bb3cfe06d3a8aa3dd1 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:06:17 +0000 Subject: [PATCH 14/19] [feature] Connects tiles on the boundary of each CGRA to enable larger scale CGRA modeling --- .github/workflows/python-package.yml | 8 +- cgra/CgraCrossbarDataMemRingCtrlMemRTL.py | 51 ++++++++++--- .../CgraCrossbarDataMemRingCtrlMemRTL_test.py | 21 +++++- .../CgraCrossbarDataMemRingCtrlMemRTL_test.py | 21 +++++- scale_out/RingMultiCgraRingCtrlMemRTL.py | 74 +++++++++++++------ .../test/RingMultiCgraRingCtrlMemRTL_test.py | 18 +++-- .../CgraMemBottomRTL.py | 5 +- .../CgraMemRightAndBottomRTL.py | 6 +- .../CgraMemBottomRTL_matmul_2x2_test.py | 12 +-- ...graMemRightAndBottomRTL_matmul_2x2_test.py | 18 +---- 10 files changed, 156 insertions(+), 78 deletions(-) rename cgra/CGRAMemBottomRTL.py => systolic/CgraMemBottomRTL.py (96%) rename cgra/CGRAMemRightAndBottomRTL.py => systolic/CgraMemRightAndBottomRTL.py (96%) rename cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py => systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py (97%) rename cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py => systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py (97%) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index dc66428..5afa4bd 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -67,15 +67,15 @@ jobs: # Separate crossbars CGRA simulation/translation. pytest ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x2 CGRA performs 2x2 matmul translation. - pytest ../cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x3 CGRA performs 2x2 matmul simulation/translation. - pytest ../cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short - pytest ../cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short + pytest ../systolic/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Ring network simulation. pytest ../noc/PyOCN/pymtl3_net/ringnet/test/RingNetworkRTL_test.py --tb=short -sv # CGRA with separate crossbars (for tiles and FUs), crossbar-based data # memory (for multi-bank), ring-based control memories, and controller. - pytest --tb=short -sv ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd + pytest --tb=short -sv ../systolic/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd # CGRAs are interconnected with ring topology. The CGRA contains # separate crossbars (for tiles and FUs), crossbar-based data memory (for # multi-bank), and controller. diff --git a/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py index 2b9578b..ab7df25 100644 --- a/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py +++ b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py @@ -51,6 +51,17 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) s.send_to_noc = ValRdySendIfcRTL(NocPktType) + # Interfaces on the boundary of the CGRA. + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width)] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width)] + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width)] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width)] + + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] + # s.recv_towards_controller = RecvIfcRTL(DataType) # s.send_from_controller = SendIfcRTL(DataType) @@ -124,24 +135,40 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] if i // width == 0: - s.tile[i].send_data[PORT_SOUTH].rdy //= 0 - s.tile[i].recv_data[PORT_SOUTH].en //= 0 - s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_SOUTH] //= s.send_data_on_boundary_south[i % width] + s.tile[i].recv_data[PORT_SOUTH] //= s.recv_data_on_boundary_south[i % width] if i // width == height - 1: - s.tile[i].send_data[PORT_NORTH].rdy //= 0 - s.tile[i].recv_data[PORT_NORTH].en //= 0 - s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_NORTH] //= s.send_data_on_boundary_north[i % width] + s.tile[i].recv_data[PORT_NORTH] //= s.recv_data_on_boundary_north[i % width] if i % width == 0: - s.tile[i].send_data[PORT_WEST].rdy //= 0 - s.tile[i].recv_data[PORT_WEST].en //= 0 - s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_WEST] //= s.send_data_on_boundary_west[i // width] + s.tile[i].recv_data[PORT_WEST] //= s.recv_data_on_boundary_west[i // width] if i % width == width - 1: - s.tile[i].send_data[PORT_EAST].rdy //= 0 - s.tile[i].recv_data[PORT_EAST].en //= 0 - s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_EAST] //= s.send_data_on_boundary_east[i // width] + s.tile[i].recv_data[PORT_EAST] //= s.recv_data_on_boundary_east[i // width] + + # if i // width == 0: + # s.tile[i].send_data[PORT_SOUTH].rdy //= 0 + # s.tile[i].recv_data[PORT_SOUTH].en //= 0 + # s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) + + # if i // width == height - 1: + # s.tile[i].send_data[PORT_NORTH].rdy //= 0 + # s.tile[i].recv_data[PORT_NORTH].en //= 0 + # s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) + + # if i % width == 0: + # s.tile[i].send_data[PORT_WEST].rdy //= 0 + # s.tile[i].recv_data[PORT_WEST].en //= 0 + # s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) + + # if i % width == width - 1: + # s.tile[i].send_data[PORT_EAST].rdy //= 0 + # s.tile[i].recv_data[PORT_EAST].en //= 0 + # s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) if i % width == 0: s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] diff --git a/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py index a4db3ce..9757073 100644 --- a/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py +++ b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -31,7 +31,6 @@ #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, ControllerIdType, controller_id, width, height, @@ -51,11 +50,29 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, controller2addr_map) # Connections + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + for tile_col in range(width): + s.dut.send_data_on_boundary_north[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_north[tile_col].en //= 0 + s.dut.recv_data_on_boundary_north[tile_col].msg //= DataType() + + s.dut.send_data_on_boundary_south[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_south[tile_col].en //= 0 + s.dut.recv_data_on_boundary_south[tile_col].msg //= DataType() + + for tile_row in range(height): + s.dut.send_data_on_boundary_west[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_west[tile_row].en //= 0 + s.dut.recv_data_on_boundary_west[tile_row].msg //= DataType() + + s.dut.send_data_on_boundary_east[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_east[tile_row].en //= 0 + s.dut.recv_data_on_boundary_east[tile_row].msg //= DataType() def done(s): return s.src_ctrl_pkt.done() diff --git a/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py index a4db3ce..9757073 100644 --- a/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py +++ b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -31,7 +31,6 @@ #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, ControllerIdType, controller_id, width, height, @@ -51,11 +50,29 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, controller2addr_map) # Connections + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + for tile_col in range(width): + s.dut.send_data_on_boundary_north[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_north[tile_col].en //= 0 + s.dut.recv_data_on_boundary_north[tile_col].msg //= DataType() + + s.dut.send_data_on_boundary_south[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_south[tile_col].en //= 0 + s.dut.recv_data_on_boundary_south[tile_col].msg //= DataType() + + for tile_row in range(height): + s.dut.send_data_on_boundary_west[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_west[tile_row].en //= 0 + s.dut.recv_data_on_boundary_west[tile_row].msg //= DataType() + + s.dut.send_data_on_boundary_east[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_east[tile_row].en //= 0 + s.dut.recv_data_on_boundary_east[tile_row].msg //= DataType() def done(s): return s.src_ctrl_pkt.done() diff --git a/scale_out/RingMultiCgraRingCtrlMemRTL.py b/scale_out/RingMultiCgraRingCtrlMemRTL.py index d84b1bb..9dc243c 100644 --- a/scale_out/RingMultiCgraRingCtrlMemRTL.py +++ b/scale_out/RingMultiCgraRingCtrlMemRTL.py @@ -12,47 +12,42 @@ from pymtl3.stdlib.primitive import RegisterFile from ..cgra.CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.opt_type import * from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ..lib.opt_type import * +from ..lib.util.common import * from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL class RingMultiCgraRingCtrlMemRTL(Component): def construct(s, CGRADataType, PredicateType, CtrlPktType, - CtrlSignalType, NocPktType, CmdType, num_terminals, - width, height, ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_ctrl, total_steps, FunctionUnit, FuList, - controller2addr_map, preload_data = None, + CtrlSignalType, NocPktType, CmdType, cgra_rows, + cgra_columns, tile_rows, tile_columns, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, num_ctrl, total_steps, FunctionUnit, + FuList, controller2addr_map, preload_data = None, preload_const = None): # Constant - RingPos = mk_ring_pos(num_terminals) - s.num_terminals = num_terminals - s.num_tiles = width * height + s.num_terminals = cgra_rows * cgra_columns + RingPos = mk_ring_pos(s.num_terminals) + s.num_tiles = tile_rows * tile_columns CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - ControllerIdType = mk_bits(clog2(num_terminals)) + ControllerIdType = mk_bits(clog2(s.num_terminals)) # Interface - # # Request from/to CPU. - # s.recv_from_cpu = RecvIfcRTL(CGRADataType) - # s.send_to_cpu = SendIfcRTL(CGRADataType) - # s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - # for _ in range(s.num_terminals)] - # s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - # for _ in range(s.num_terminals)] + # Request from/to CPU. s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) # Components s.cgra = [CgraCrossbarDataMemRingCtrlMemRTL( CGRADataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, ControllerIdType, terminal_id, - width, height, ctrl_mem_size, data_mem_size_global, + tile_columns, tile_rows, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, preload_data = None, preload_const = None) for terminal_id in range(s.num_terminals)] - s.ring = RingNetworkRTL(NocPktType, RingPos, num_terminals, 0) + s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_terminals, 0) # Connections s.recv_from_cpu_ctrl_pkt //= s.cgra[0].recv_from_cpu_ctrl_pkt @@ -63,8 +58,45 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, for i in range(1, s.num_terminals): s.cgra[i].recv_from_cpu_ctrl_pkt.val //= 0 s.cgra[i].recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() - # s.recv_waddr[i][j] //= s.cgra[i].recv_waddr[j] - # s.recv_wopt[i][j] //= s.cgra[i].recv_wopt[j] + + # Connects the tiles on the boundary of each two ajacent CGRAs. + for cgra_row in range(cgra_rows): + for cgra_col in range(cgra_columns): + if cgra_row != 0: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] + else: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CGRADataType() + + if cgra_row == cgra_rows - 1: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CGRADataType() + + if cgra_col != 0: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] + else: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CGRADataType() + + if cgra_col == cgra_columns - 1: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CGRADataType() def line_trace(s): res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) diff --git a/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py index 5f30261..ea2a223 100644 --- a/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py +++ b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py @@ -32,12 +32,12 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, + cgra_rows, cgra_columns, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, controller2addr_map): - s.num_terminals = num_terminals + s.num_terminals = cgra_rows * cgra_columns s.num_tiles = width * height # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) @@ -50,7 +50,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, # for i in range(s.num_terminals)] s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, - NocPktType, CmdType, num_terminals, width, height, + NocPktType, CmdType, cgra_rows, cgra_columns, height, width, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) @@ -93,7 +93,9 @@ def test_homo_2x2(cmdline_opts): data_mem_size_global = 32 data_mem_size_per_bank = 4 num_banks_per_cgra = 2 - num_terminals = 4 + cgra_rows = 2 + cgra_columns = 2 + num_terminals = cgra_rows * cgra_columns width = 2 height = 2 num_ctrl_actions = 6 @@ -203,10 +205,10 @@ def test_homo_2x2(cmdline_opts): src_ctrl_pkt.extend(opt_per_tile) th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, - CtrlSignalType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_ctrl_pkt, ctrl_mem_size, - controller2addr_map) + CtrlSignalType, NocPktType, CmdType, cgra_rows, cgra_columns, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, + ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', diff --git a/cgra/CGRAMemBottomRTL.py b/systolic/CgraMemBottomRTL.py similarity index 96% rename from cgra/CGRAMemBottomRTL.py rename to systolic/CgraMemBottomRTL.py index 92c484f..6b22c79 100644 --- a/cgra/CGRAMemBottomRTL.py +++ b/systolic/CgraMemBottomRTL.py @@ -1,6 +1,6 @@ """ ========================================================================= -CGRAMemBottomRTL.py +CgraMemBottomRTL.py ========================================================================= The scrachpad memory is connected to the bottom (first row) tiles. @@ -21,8 +21,7 @@ from ..noc.CrossbarRTL import CrossbarRTL from ..tile.TileRTL import TileRTL -class CGRAMemBottomRTL(Component): - +class CgraMemBottomRTL(Component): def construct(s, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, FunctionUnit, FuList, preload_data = None, diff --git a/cgra/CGRAMemRightAndBottomRTL.py b/systolic/CgraMemRightAndBottomRTL.py similarity index 96% rename from cgra/CGRAMemRightAndBottomRTL.py rename to systolic/CgraMemRightAndBottomRTL.py index 1436276..f96550f 100644 --- a/cgra/CGRAMemRightAndBottomRTL.py +++ b/systolic/CgraMemRightAndBottomRTL.py @@ -1,6 +1,6 @@ """ ========================================================================= -CGRAMemRightAndBottomRTL.py +CgraMemRightAndBottomRTL.py ========================================================================= Two scrachpad memories are connected to the bottom (first row) and the last column (except the one on the first row) tiles. For example, in a @@ -24,9 +24,7 @@ from ..noc.CrossbarRTL import CrossbarRTL from ..tile.TileRTL import TileRTL - -class CGRAMemRightAndBottomRTL(Component): - +class CgraMemRightAndBottomRTL(Component): def construct(s, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, FunctionUnit, FuList, preload_data = None, diff --git a/cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py b/systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py similarity index 97% rename from cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py rename to systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py index f1d510e..69ffc0f 100644 --- a/cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py +++ b/systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py @@ -1,6 +1,6 @@ """ ========================================================================== -CGRAMemBottomRTL_matmul_2x2_test.py +CgraMemBottomRTL_matmul_2x2_test.py ========================================================================== Translation for 3x2 CGRA. The provided test is only used for a 2x2 matmul. @@ -13,7 +13,7 @@ from pymtl3.passes.backends.verilog import VerilogTranslationPass from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from ..CGRAMemBottomRTL import CGRAMemBottomRTL +from ..CgraMemBottomRTL import CgraMemBottomRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.BranchRTL import BranchRTL @@ -30,7 +30,6 @@ from ...lib.messages import * from ...lib.opt_type import * - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -38,7 +37,6 @@ kMaxCycles = 20 class TestHarness(Component): - def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, src_opt, ctrl_waddr, preload_data, preload_const, @@ -110,13 +108,11 @@ def test_CGRA_systolic(cmdline_opts): AddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height num_fu_in = 4 - DUT = CGRAMemBottomRTL + DUT = CgraMemBottomRTL FunctionUnit = FlexibleFuRTL FuList = [SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - # FuList = [ SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL ] - # DataType = mk_data(16, 1) CtrlType = mk_ctrl(num_fu_in, num_xbar_inports, num_xbar_outports) FuInType = mk_bits(clog2( num_fu_in + 1)) pickRegister = [FuInType(x + 1) for x in range(num_fu_in)] @@ -301,7 +297,7 @@ def test_CGRA_systolic(cmdline_opts): th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, - f'CGRARTL') + f'CgraMemBottomRTL') # th.dut.set_metadata( VerilogVerilatorImportPass.vl_Wno_list, # ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', # 'ALWCOMBORDER'] ) diff --git a/cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py b/systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py similarity index 97% rename from cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py rename to systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py index 7ad2000..02dfb58 100644 --- a/cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py @@ -1,6 +1,6 @@ """ ========================================================================== -CGRARightAndBottomRTL_matmul_2x2_test.py +CgraRightAndBottomRTL_matmul_2x2_test.py ========================================================================== Translation for 3x3 CGRA. The provided test is only used for a 2x2 matmul. @@ -13,7 +13,7 @@ from pymtl3.passes.backends.verilog import VerilogTranslationPass from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from ..CGRAMemRightAndBottomRTL import CGRAMemRightAndBottomRTL +from ..CgraMemRightAndBottomRTL import CgraMemRightAndBottomRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.BranchRTL import BranchRTL @@ -30,7 +30,6 @@ from ...lib.messages import * from ...lib.opt_type import * - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -38,7 +37,6 @@ kMaxCycles = 12 class TestHarness(Component): - def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, src_opt, ctrl_waddr, preload_data, preload_const, @@ -59,12 +57,6 @@ def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, kMaxCycles, FunctionUnit, fu_list, preload_data, preload_const) - # s.sink_out = [TestSinkRTL(DataType, sink_out[i]) - # for i in range(height - 1)] - - # for i in range(height - 1): - # connect(s.dut.send_data[i], s.sink_out[i].recv) - for i in range(s.num_tiles): connect(s.src_opt[i].send, s.dut.recv_wopt[i]) connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) @@ -134,13 +126,11 @@ def test_CGRA_systolic(cmdline_opts): AddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height num_fu_in = 4 - DUT = CGRAMemRightAndBottomRTL + DUT = CgraMemRightAndBottomRTL FunctionUnit = FlexibleFuRTL FuList = [SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - # FuList = [ SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL ] - # DataType = mk_data(16, 1) CtrlType = mk_ctrl(num_fu_in, num_xbar_inports, num_xbar_outports) FuInType = mk_bits(clog2( num_fu_in + 1)) pickRegister = [FuInType(x + 1) for x in range(num_fu_in)] @@ -403,7 +393,7 @@ def test_CGRA_systolic(cmdline_opts): th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, - f'CGRAMemRightAndBottomRTL') + f'CgraMemRightAndBottomRTL') # th.dut.set_metadata( VerilogVerilatorImportPass.vl_Wno_list, # ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', # 'ALWCOMBORDER'] ) From 448770f0b461c8e2f8ab5641d9145e559035cf95 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:14:06 +0000 Subject: [PATCH 15/19] [test] Add __init__.py --- systolic/translation/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 systolic/translation/__init__.py diff --git a/systolic/translation/__init__.py b/systolic/translation/__init__.py new file mode 100644 index 0000000..e69de29 From 61a513b20351327687b273f5cdf4236326d75227 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:18:41 +0000 Subject: [PATCH 16/19] [test] Fix ring multi-cgra translation --- .../RingMultiCgraRingCtrlMemRTL_test.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py index 5f30261..ea2a223 100644 --- a/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py +++ b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py @@ -32,12 +32,12 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, CtrlSignalType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, + cgra_rows, cgra_columns, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, controller2addr_map): - s.num_terminals = num_terminals + s.num_terminals = cgra_rows * cgra_columns s.num_tiles = width * height # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) @@ -50,7 +50,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, # for i in range(s.num_terminals)] s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, - NocPktType, CmdType, num_terminals, width, height, + NocPktType, CmdType, cgra_rows, cgra_columns, height, width, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) @@ -93,7 +93,9 @@ def test_homo_2x2(cmdline_opts): data_mem_size_global = 32 data_mem_size_per_bank = 4 num_banks_per_cgra = 2 - num_terminals = 4 + cgra_rows = 2 + cgra_columns = 2 + num_terminals = cgra_rows * cgra_columns width = 2 height = 2 num_ctrl_actions = 6 @@ -203,10 +205,10 @@ def test_homo_2x2(cmdline_opts): src_ctrl_pkt.extend(opt_per_tile) th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, - CtrlSignalType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_ctrl_pkt, ctrl_mem_size, - controller2addr_map) + CtrlSignalType, NocPktType, CmdType, cgra_rows, cgra_columns, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, + ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', From 82ed321e10e518d0284949e682fff463c465fa52 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:26:36 +0000 Subject: [PATCH 17/19] [test] Fix name in Actions --- .github/workflows/python-package.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5afa4bd..639c54c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -67,10 +67,10 @@ jobs: # Separate crossbars CGRA simulation/translation. pytest ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x2 CGRA performs 2x2 matmul translation. - pytest ../systolic/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x3 CGRA performs 2x2 matmul simulation/translation. - pytest ../systolic/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short - pytest ../systolic/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short + pytest ../systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Ring network simulation. pytest ../noc/PyOCN/pymtl3_net/ringnet/test/RingNetworkRTL_test.py --tb=short -sv # CGRA with separate crossbars (for tiles and FUs), crossbar-based data From 94898e81ee2792e2ebef0daf887ac28351003001 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:36:26 +0000 Subject: [PATCH 18/19] [test] Directory naming --- .../CgraMemBottomRTL_matmul_2x2_test.py | 0 .../CgraMemRightAndBottomRTL_matmul_2x2_test.py | 0 systolic/{translation => translate}/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename systolic/{translation => translate}/CgraMemBottomRTL_matmul_2x2_test.py (100%) rename systolic/{translation => translate}/CgraMemRightAndBottomRTL_matmul_2x2_test.py (100%) rename systolic/{translation => translate}/__init__.py (100%) diff --git a/systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py b/systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py similarity index 100% rename from systolic/translation/CgraMemBottomRTL_matmul_2x2_test.py rename to systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py diff --git a/systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py b/systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py similarity index 100% rename from systolic/translation/CgraMemRightAndBottomRTL_matmul_2x2_test.py rename to systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py diff --git a/systolic/translation/__init__.py b/systolic/translate/__init__.py similarity index 100% rename from systolic/translation/__init__.py rename to systolic/translate/__init__.py From d2f625b933d1bbcfd1cae2fae37b48328bdfa4f6 Mon Sep 17 00:00:00 2001 From: tancheng Date: Tue, 24 Dec 2024 23:45:18 +0000 Subject: [PATCH 19/19] [test] CGRA translation file name --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 639c54c..24255b1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -75,7 +75,7 @@ jobs: pytest ../noc/PyOCN/pymtl3_net/ringnet/test/RingNetworkRTL_test.py --tb=short -sv # CGRA with separate crossbars (for tiles and FUs), crossbar-based data # memory (for multi-bank), ring-based control memories, and controller. - pytest --tb=short -sv ../systolic/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd + pytest --tb=short -sv ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd # CGRAs are interconnected with ring topology. The CGRA contains # separate crossbars (for tiles and FUs), crossbar-based data memory (for # multi-bank), and controller.