diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2850045..24255b1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -65,20 +65,19 @@ jobs: # Kingmesh topology CGRA translation. pytest ../cgra/translate/VectorCGRAKingMeshRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Separate crossbars CGRA simulation/translation. - pytest ../cgra/test/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd - pytest ../cgra/translate/CGRASeparateCrossbarRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x2 CGRA performs 2x2 matmul translation. - pytest ../cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # 3x3 CGRA performs 2x2 matmul simulation/translation. - pytest ../cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short - pytest ../cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd + pytest ../systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short + pytest ../systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py -xvs --tb=short --test-verilog --dump-vtb --dump-vcd # Ring network simulation. pytest ../noc/PyOCN/pymtl3_net/ringnet/test/RingNetworkRTL_test.py --tb=short -sv # CGRA with separate crossbars (for tiles and FUs), crossbar-based data - # memory (for multi-bank), and controller. - pytest --tb=short -sv ../cgra/translate/CGRAWithCrossbarDataMemRTL_test.py --test-verilog --dump-vtb --dump-vcd + # memory (for multi-bank), ring-based control memories, and controller. + pytest --tb=short -sv ../cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd # CGRAs are interconnected with ring topology. The CGRA contains # separate crossbars (for tiles and FUs), crossbar-based data memory (for # multi-bank), and controller. - pytest --tb=short -sv ../scale_out/translate/RingMultiCGRARTL_test.py --test-verilog --dump-vtb --dump-vcd + pytest --tb=short -sv ../scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py --test-verilog --dump-vtb --dump-vcd diff --git a/cgra/CGRASeparateCrossbarRTL.py b/cgra/CGRASeparateCrossbarRTL.py deleted file mode 100644 index 8ea53bf..0000000 --- a/cgra/CGRASeparateCrossbarRTL.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -========================================================================= -CGRASeparateCrossbarRTL.py -========================================================================= - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - -from pymtl3 import * -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.AdderRTL import AdderRTL -from ..lib.util.common import * -from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.opt_type import * -from ..mem.data.DataMemCL import DataMemCL -from ..mem.data.DataMemRTL import DataMemRTL -from ..noc.ChannelNormalRTL import ChannelNormalRTL -from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL -from ..tile.TileSeparateCrossbarRTL import TileSeparateCrossbarRTL - - -class CGRASeparateCrossbarRTL(Component): - def construct(s, DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, num_ctrl, total_steps, - FunctionUnit, FuList, preload_data = None, - preload_const = None): - - s.num_tiles = width * height - s.num_mesh_ports = 4 - AddrType = mk_bits(clog2(ctrl_mem_size)) - - # Interfaces - s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(s.num_tiles)] - s.recv_wopt = [RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - - # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(width*height)] - s.tile = [TileSeparateCrossbarRTL(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, const_list = preload_const[i]) - for i in range( s.num_tiles)] - s.data_mem = DataMemRTL(DataType, data_mem_size, height, height, preload_data) - - # Connections - for i in range(s.num_tiles): - s.recv_waddr[i] //= s.tile[i].recv_waddr - s.recv_wopt[i] //= s.tile[i].recv_wopt - - if i // width > 0: - s.tile[i].send_data[PORT_SOUTH] //= s.tile[i-width].recv_data[PORT_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_NORTH] //= s.tile[i+width].recv_data[PORT_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_WEST] //= s.tile[i-1].recv_data[PORT_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] - - if i // width == 0: - s.tile[i].send_data[PORT_SOUTH].rdy //= 0 - s.tile[i].recv_data[PORT_SOUTH].en //= 0 - s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_NORTH].rdy //= 0 - s.tile[i].recv_data[PORT_NORTH].en //= 0 - s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].send_data[PORT_WEST].rdy //= 0 - s.tile[i].recv_data[PORT_WEST].en //= 0 - s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) - - if i % width == width - 1: - s.tile[i].send_data[PORT_EAST].rdy //= 0 - s.tile[i].recv_data[PORT_EAST].en //= 0 - s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) - - if i % width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[i//width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[i//width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[i//width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.en //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace( s ): - # str = "||".join([ x.element.line_trace() for x in s.tile ]) - # str += " :: [" + s.data_mem.line_trace() + "]" - res = "||\n".join([ (("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile) ]) - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res diff --git a/cgra/CGRAWithCrossbarDataMemRTL.py b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py similarity index 53% rename from cgra/CGRAWithCrossbarDataMemRTL.py rename to cgra/CgraCrossbarDataMemRingCtrlMemRTL.py index 646fd88..ab7df25 100644 --- a/cgra/CGRAWithCrossbarDataMemRTL.py +++ b/cgra/CgraCrossbarDataMemRingCtrlMemRTL.py @@ -1,13 +1,14 @@ """ ========================================================================= -CGRAWithCrossbarDataMemRTL.py +CgraCrossbarDataMemRingCtrlMemRTL.py ========================================================================= Author : Cheng Tan - Date : Dec 13, 2024 + Date : Dec 22, 2024 """ from pymtl3 import * +from ..controller.ControllerRTL import ControllerRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.MemUnitRTL import MemUnitRTL from ..fu.single.AdderRTL import AdderRTL @@ -19,20 +20,20 @@ from ..mem.data.DataMemWithCrossbarRTL import DataMemWithCrossbarRTL from ..noc.ChannelNormalRTL import ChannelNormalRTL from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL from ..tile.TileSeparateCrossbarRTL import TileSeparateCrossbarRTL -from ..controller.ControllerRTL import ControllerRTL - -class CGRAWithCrossbarDataMemRTL(Component): - - def construct(s, DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, +class CgraCrossbarDataMemRingCtrlMemRTL(Component): + def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, preload_data = None, preload_const = None): s.num_tiles = width * height + CtrlRingPos = mk_ring_pos(s.num_tiles) s.num_mesh_ports = 4 CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) DataAddrType = mk_bits(clog2(data_mem_size_global)) @@ -40,8 +41,9 @@ def construct(s, DataType, PredicateType, CtrlType, NocPktType, data_mem_size_global) # Interfaces - s.recv_waddr = [RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - s.recv_wopt = [RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] + # s.recv_waddr = [RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] + # s.recv_wopt = [RecvIfcRTL(CtrlSignalType) for _ in range(s.num_tiles)] + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) # Explicitly provides the ValRdyRecvIfcRTL in the library, as the # translation pass sometimes not able to distinguish the @@ -49,29 +51,40 @@ def construct(s, DataType, PredicateType, CtrlType, NocPktType, s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) s.send_to_noc = ValRdySendIfcRTL(NocPktType) + # Interfaces on the boundary of the CGRA. + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width)] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width)] + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width)] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width)] + + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] + # s.recv_towards_controller = RecvIfcRTL(DataType) # s.send_from_controller = SendIfcRTL(DataType) - # Components if preload_const == None: preload_const = [[DataType(0, 0)] for _ in range(width*height)] - s.tile = [TileSeparateCrossbarRTL(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size_global, - num_ctrl, total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, const_list = preload_const[i]) - for i in range( s.num_tiles)] + s.tile = [TileSeparateCrossbarRTL( + DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, + data_mem_size_global, num_ctrl, total_steps, 4, 2, + s.num_mesh_ports, s.num_mesh_ports, + const_list = preload_const[i]) for i in range( s.num_tiles)] s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, height, height, + num_banks_per_cgra, + height, height, preload_data) - s.controller = ControllerRTL(ControllerIdType, CmdType, NocPktType, - DataType, DataAddrType, controller_id, - controller2addr_map) + s.controller = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, + NocPktType, DataType, DataAddrType, + controller_id, controller2addr_map) + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 0) # Connections - # Connects data memory with controller. # s.data_mem.recv_from_noc //= s.controller.send_to_master # s.data_mem.send_to_noc //= s.controller.recv_from_master @@ -90,12 +103,24 @@ def construct(s, DataType, PredicateType, CtrlType, NocPktType, s.recv_from_noc //= s.controller.recv_from_noc s.send_to_noc //= s.controller.send_to_noc + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_ctrl_pkt //= s.controller.recv_from_cpu_ctrl_pkt + # s.recv_towards_controller //= s.controller.recv_from_master # s.send_from_controller //= s.controller.send_to_master + # Connects ring with each control memory. for i in range(s.num_tiles): - s.recv_waddr[i] //= s.tile[i].recv_waddr - s.recv_wopt[i] //= s.tile[i].recv_wopt + s.ctrl_ring.send[i] //= s.tile[i].recv_ctrl_pkt + + s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_ctrl_pkt + for i in range(1, s.num_tiles): + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + + for i in range(s.num_tiles): + # s.recv_waddr[i] //= s.tile[i].recv_waddr + # s.recv_wopt[i] //= s.tile[i].recv_wopt if i // width > 0: s.tile[i].send_data[PORT_SOUTH] //= s.tile[i-width].recv_data[PORT_NORTH] @@ -110,24 +135,40 @@ def construct(s, DataType, PredicateType, CtrlType, NocPktType, s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST] if i // width == 0: - s.tile[i].send_data[PORT_SOUTH].rdy //= 0 - s.tile[i].recv_data[PORT_SOUTH].en //= 0 - s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_SOUTH] //= s.send_data_on_boundary_south[i % width] + s.tile[i].recv_data[PORT_SOUTH] //= s.recv_data_on_boundary_south[i % width] if i // width == height - 1: - s.tile[i].send_data[PORT_NORTH].rdy //= 0 - s.tile[i].recv_data[PORT_NORTH].en //= 0 - s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_NORTH] //= s.send_data_on_boundary_north[i % width] + s.tile[i].recv_data[PORT_NORTH] //= s.recv_data_on_boundary_north[i % width] if i % width == 0: - s.tile[i].send_data[PORT_WEST].rdy //= 0 - s.tile[i].recv_data[PORT_WEST].en //= 0 - s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_WEST] //= s.send_data_on_boundary_west[i // width] + s.tile[i].recv_data[PORT_WEST] //= s.recv_data_on_boundary_west[i // width] if i % width == width - 1: - s.tile[i].send_data[PORT_EAST].rdy //= 0 - s.tile[i].recv_data[PORT_EAST].en //= 0 - s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_EAST] //= s.send_data_on_boundary_east[i // width] + s.tile[i].recv_data[PORT_EAST] //= s.recv_data_on_boundary_east[i // width] + + # if i // width == 0: + # s.tile[i].send_data[PORT_SOUTH].rdy //= 0 + # s.tile[i].recv_data[PORT_SOUTH].en //= 0 + # s.tile[i].recv_data[PORT_SOUTH].msg //= DataType(0, 0) + + # if i // width == height - 1: + # s.tile[i].send_data[PORT_NORTH].rdy //= 0 + # s.tile[i].recv_data[PORT_NORTH].en //= 0 + # s.tile[i].recv_data[PORT_NORTH].msg //= DataType(0, 0) + + # if i % width == 0: + # s.tile[i].send_data[PORT_WEST].rdy //= 0 + # s.tile[i].recv_data[PORT_WEST].en //= 0 + # s.tile[i].recv_data[PORT_WEST].msg //= DataType(0, 0) + + # if i % width == width - 1: + # s.tile[i].send_data[PORT_EAST].rdy //= 0 + # s.tile[i].recv_data[PORT_EAST].en //= 0 + # s.tile[i].recv_data[PORT_EAST].msg //= DataType(0, 0) if i % width == 0: s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[i//width] @@ -141,7 +182,6 @@ def construct(s, DataType, PredicateType, CtrlType, NocPktType, s.tile[i].to_mem_waddr.rdy //= 0 s.tile[i].to_mem_wdata.rdy //= 0 - # Line trace def line_trace( s ): # str = "||".join([ x.element.line_trace() for x in s.tile ]) @@ -150,3 +190,4 @@ def line_trace( s ): for (i,x) in enumerate(s.tile) ]) res += "\n :: [" + s.data_mem.line_trace() + "] \n" return res + diff --git a/cgra/test/CGRASeparateCrossbarRTL_test.py b/cgra/test/CGRASeparateCrossbarRTL_test.py deleted file mode 100644 index a80e20b..0000000 --- a/cgra/test/CGRASeparateCrossbarRTL_test.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -========================================================================== -CGRASeparateCrossbarRTL_test.py -========================================================================== -Test cases for CGRAs with different configurations. - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRASeparateCrossbarRTL import CGRASeparateCrossbarRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr): - - s.num_tiles = width * height - AddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(AddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, len(src_opt[0]), - len(src_opt[0]), FunctionUnit, FuList) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size = 8 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - -def test_hetero_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - data_mem_size = 8 - num_fu_in = 4 - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.set_param("top.dut.tile[1].construct", FuList = [ShifterRTL]) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - #th.set_param("top.dut.tile[1].construct", FuList=[MemUnitRTL,ShifterRTL]) - run_sim(th) - diff --git a/cgra/translate/CGRAWithCrossbarDataMemRTL_test.py b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py similarity index 56% rename from cgra/translate/CGRAWithCrossbarDataMemRTL_test.py rename to cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py index ea4e7f9..9757073 100644 --- a/cgra/translate/CGRAWithCrossbarDataMemRTL_test.py +++ b/cgra/test/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -1,11 +1,12 @@ """ ========================================================================== -CGRAWithCrossbarDataMemRTL_test.py +CgraCrossbarDataMemRingCtrlMemRTL_test.py ========================================================================== -Test cases for CGRA with crossbar-based data memory. +Test cases for CGRA with crossbar-based data memory and ring-based control +memory of each tile. Author : Cheng Tan - Date : Dec 14, 2024 + Date : Dec 22, 2024 """ @@ -14,61 +15,67 @@ config_model_with_cmdline_opts) from pymtl3.passes.backends.verilog import (VerilogTranslationPass, VerilogVerilatorImportPass) -from ..CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL +from ..CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.MemUnitRTL import MemUnitRTL from ...fu.single.ShifterRTL import ShifterRTL from ...lib.messages import * +from ...lib.cmd_type import * from ...lib.opt_type import * from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, - PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, controller_id, width, height, + PredicateType, CtrlPktType, CtrlSignalType, NocPktType, + CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - src_opt, ctrl_waddr, controller2addr_map): + src_ctrl_pkt, ctrl_steps, controller2addr_map): s.num_tiles = width * height CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) DataAddrType = mk_bits(clog2(data_mem_size_global)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - len(src_opt[0]), len(src_opt[0]), FunctionUnit, FuList, + ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) # Connections + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) + for tile_col in range(width): + s.dut.send_data_on_boundary_north[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_north[tile_col].en //= 0 + s.dut.recv_data_on_boundary_north[tile_col].msg //= DataType() + + s.dut.send_data_on_boundary_south[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_south[tile_col].en //= 0 + s.dut.recv_data_on_boundary_south[tile_col].msg //= DataType() + + for tile_row in range(height): + s.dut.send_data_on_boundary_west[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_west[tile_row].en //= 0 + s.dut.recv_data_on_boundary_west[tile_row].msg //= DataType() + + s.dut.send_data_on_boundary_east[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_east[tile_row].en //= 0 + s.dut.recv_data_on_boundary_east[tile_row].msg //= DataType() def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done + return s.src_ctrl_pkt.done() def line_trace(s): return s.dut.line_trace() @@ -85,6 +92,9 @@ def test_homo_2x2(cmdline_opts): num_banks_per_cgra = 2 width = 2 height = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) @@ -92,15 +102,14 @@ def test_homo_2x2(cmdline_opts): AddrType = mk_bits(addr_nbits) CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height - DUT = CGRAWithCrossbarDataMemRTL + DUT = CgraCrossbarDataMemRingCtrlMemRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - nterminals = 4 CmdType = mk_bits(4) - ControllerIdType = mk_bits(clog2(nterminals)) + ControllerIdType = mk_bits(clog2(num_terminals)) controller_id = 1 controller2addr_map = { 0: [0, 3], @@ -109,15 +118,31 @@ def test_homo_2x2(cmdline_opts): 3: [12, 15], } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = nterminals, + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, addr_nbits = addr_nbits, data_nbits = 32, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), # TODO: make below as TileInType(5) to double check. @@ -125,7 +150,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -133,7 +158,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -141,7 +166,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_STR, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -149,7 +174,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -157,27 +182,38 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, ControllerIdType, - controller_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, - controller2addr_map) + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) diff --git a/cgra/translate/CGRASeparateCrossbarRTL_test.py b/cgra/translate/CGRASeparateCrossbarRTL_test.py deleted file mode 100644 index ad00c6c..0000000 --- a/cgra/translate/CGRASeparateCrossbarRTL_test.py +++ /dev/null @@ -1,148 +0,0 @@ -""" -========================================================================== -CGRASeparateCrossbarRTL_test.py -========================================================================== -Test cases for CGRAs with different configurations. - -Author : Cheng Tan - Date : Nov 29, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.test_utils import (run_sim, - config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) -from ..CGRASeparateCrossbarRTL import CGRASeparateCrossbarRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...lib.basic.en_rdy.test_srcs import TestSrcRTL -from ...lib.messages import * -from ...lib.opt_type import * - - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr): - - s.num_tiles = width * height - AddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(AddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, width, height, - ctrl_mem_size, data_mem_size, len(src_opt[0]), - len(src_opt[0]), FunctionUnit, FuList) - - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) - - def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done - - def line_trace(s): - return s.dut.line_trace() - -def test_homo_2x2(cmdline_opts): - num_tile_inports = 4 - num_tile_outports = 4 - num_fu_inports = 4 - num_fu_outports = 2 - num_routing_outports = num_tile_outports + num_fu_inports - ctrl_mem_size = 6 - data_mem_size = 8 - width = 2 - height = 2 - TileInType = mk_bits(clog2(num_tile_inports + 1)) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - FuOutType = mk_bits(clog2(num_fu_outports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) - num_tiles = width * height - DUT = CGRASeparateCrossbarRTL - FunctionUnit = FlexibleFuRTL - FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(16, 1) - PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - # TODO: make below as TileInType(5) to double check. - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_STR, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - - CtrlType(OPT_ADD, b1(0), - pickRegister, - [TileInType(4), TileInType(3), TileInType(2), TileInType(1), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) - - ] for _ in range(num_tiles)] - ctrl_waddr = [[AddrType(0), AddrType(1), AddrType(2), AddrType(3), - AddrType(4), AddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, width, height, ctrl_mem_size, data_mem_size, - src_opt, ctrl_waddr) - th.elaborate() - th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, - ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', - 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) - run_sim(th) - diff --git a/cgra/test/CGRAWithCrossbarDataMemRTL_test.py b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py similarity index 56% rename from cgra/test/CGRAWithCrossbarDataMemRTL_test.py rename to cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py index ea4e7f9..9757073 100644 --- a/cgra/test/CGRAWithCrossbarDataMemRTL_test.py +++ b/cgra/translate/CgraCrossbarDataMemRingCtrlMemRTL_test.py @@ -1,11 +1,12 @@ """ ========================================================================== -CGRAWithCrossbarDataMemRTL_test.py +CgraCrossbarDataMemRingCtrlMemRTL_test.py ========================================================================== -Test cases for CGRA with crossbar-based data memory. +Test cases for CGRA with crossbar-based data memory and ring-based control +memory of each tile. Author : Cheng Tan - Date : Dec 14, 2024 + Date : Dec 22, 2024 """ @@ -14,61 +15,67 @@ config_model_with_cmdline_opts) from pymtl3.passes.backends.verilog import (VerilogTranslationPass, VerilogVerilatorImportPass) -from ..CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL +from ..CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.MemUnitRTL import MemUnitRTL from ...fu.single.ShifterRTL import ShifterRTL from ...lib.messages import * +from ...lib.cmd_type import * from ...lib.opt_type import * from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, - PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, controller_id, width, height, + PredicateType, CtrlPktType, CtrlSignalType, NocPktType, + CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - src_opt, ctrl_waddr, controller2addr_map): + src_ctrl_pkt, ctrl_steps, controller2addr_map): s.num_tiles = width * height CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) DataAddrType = mk_bits(clog2(data_mem_size_global)) - - s.src_opt = [TestSrcRTL(CtrlType, src_opt[i]) - for i in range(s.num_tiles)] - s.ctrl_waddr = [TestSrcRTL(CtrlAddrType, ctrl_waddr[i]) - for i in range(s.num_tiles)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, - CmdType, ControllerIdType, controller_id, + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, controller_id, width, height, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - len(src_opt[0]), len(src_opt[0]), FunctionUnit, FuList, + ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) # Connections + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 s.dut.recv_from_noc.msg //= NocPktType(0, 0, 0, 0, 0, 0) - for i in range(s.num_tiles): - connect(s.src_opt[i].send, s.dut.recv_wopt[i]) - connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) + for tile_col in range(width): + s.dut.send_data_on_boundary_north[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_north[tile_col].en //= 0 + s.dut.recv_data_on_boundary_north[tile_col].msg //= DataType() + + s.dut.send_data_on_boundary_south[tile_col].rdy //= 0 + s.dut.recv_data_on_boundary_south[tile_col].en //= 0 + s.dut.recv_data_on_boundary_south[tile_col].msg //= DataType() + + for tile_row in range(height): + s.dut.send_data_on_boundary_west[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_west[tile_row].en //= 0 + s.dut.recv_data_on_boundary_west[tile_row].msg //= DataType() + + s.dut.send_data_on_boundary_east[tile_row].rdy //= 0 + s.dut.recv_data_on_boundary_east[tile_row].en //= 0 + s.dut.recv_data_on_boundary_east[tile_row].msg //= DataType() def done(s): - done = True - for i in range(s.num_tiles): - if not s.src_opt[i].done(): - done = False - break - return done + return s.src_ctrl_pkt.done() def line_trace(s): return s.dut.line_trace() @@ -85,6 +92,9 @@ def test_homo_2x2(cmdline_opts): num_banks_per_cgra = 2 width = 2 height = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) @@ -92,15 +102,14 @@ def test_homo_2x2(cmdline_opts): AddrType = mk_bits(addr_nbits) CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height - DUT = CGRAWithCrossbarDataMemRTL + DUT = CgraCrossbarDataMemRingCtrlMemRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - nterminals = 4 CmdType = mk_bits(4) - ControllerIdType = mk_bits(clog2(nterminals)) + ControllerIdType = mk_bits(clog2(num_terminals)) controller_id = 1 controller2addr_map = { 0: [0, 3], @@ -109,15 +118,31 @@ def test_homo_2x2(cmdline_opts): 3: [12, 15], } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - NocPktType = mk_ring_multi_cgra_pkt(nrouters = nterminals, + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + + NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, addr_nbits = addr_nbits, data_nbits = 32, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), # TODO: make below as TileInType(5) to double check. @@ -125,7 +150,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -133,7 +158,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -141,7 +166,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_STR, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -149,7 +174,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -157,27 +182,38 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, ControllerIdType, - controller_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, - controller2addr_map) + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + ControllerIdType, controller_id, width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + src_ctrl_pkt, ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index 27edf76..68ed0b5 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -13,6 +13,7 @@ from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL from ..lib.basic.val_rdy.ifcs import SendIfcRTL as ValRdySendIfcRTL from ..lib.basic.val_rdy.ifcs import RecvIfcRTL as ValRdyRecvIfcRTL +from ..lib.basic.val_rdy.queues import NormalQueueRTL from ..noc.ChannelNormalRTL import ChannelNormalRTL from ..noc.PyOCN.pymtl3_net.xbar.XbarBypassQueueRTL import XbarBypassQueueRTL from ..lib.cmd_type import * @@ -20,7 +21,7 @@ class ControllerRTL(Component): - def construct(s, ControllerIdType, CmdType, NocPktType, + def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, CGRADataType, CGRAAddrType, controller_id, controller2addr_map): @@ -29,6 +30,9 @@ def construct(s, ControllerIdType, CmdType, NocPktType, s.recv_from_noc = ValRdyRecvIfcRTL(NocPktType) s.send_to_noc = ValRdySendIfcRTL(NocPktType) + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) + s.send_to_ctrl_ring_ctrl_pkt = ValRdySendIfcRTL(CtrlPktType) + # Request from/to master. s.recv_from_master_load_request_pkt = RecvIfcRTL(NocPktType) s.recv_from_master_load_response_pkt = RecvIfcRTL(NocPktType) @@ -60,6 +64,8 @@ def construct(s, ControllerIdType, CmdType, NocPktType, # termination). s.crossbar = XbarBypassQueueRTL(NocPktType, 3, 1) + s.recv_ctrl_pkt_queue = NormalQueueRTL(CtrlPktType) + # # TODO: below ifcs should be connected through another NoC within # # one CGRA, instead of per-tile and performing like a bus. # # Configuration signals to be written into and read from per-tile @@ -104,6 +110,16 @@ def construct(s, ControllerIdType, CmdType, NocPktType, s.send_to_master_store_request_addr_queue.send //= s.send_to_master_store_request_addr s.send_to_master_store_request_data_queue.send //= s.send_to_master_store_request_data + # For control signals delivery from CPU to tiles. + # TODO: https://github.com/tancheng/VectorCGRA/issues/11 -- The request needs + # to go through the crossbar for arbitration as well. The packet targeting local + # tiles can be delivered via thr ring within the CGRA; The packet targetting + # other CGRAs can be delivered via the NoC across CGRAs. Note that the packet + # format can be in a universal fashion to support both data and config. Later + # on, the format can be packet-based or flit-based. + s.recv_from_cpu_ctrl_pkt //= s.recv_ctrl_pkt_queue.recv + s.recv_ctrl_pkt_queue.send //= s.send_to_ctrl_ring_ctrl_pkt + @update def update_received_msg(): kLoadRequestInportIdx = 0 diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 18ebaef..57b78e2 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -29,8 +29,8 @@ class TestHarness(Component): - def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, - controller_id, + def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, + AddrType, PktType, controller_id, from_master_load_request_pkt_msgs, from_master_load_response_pkt_msgs, from_master_store_request_pkt_msgs, @@ -56,8 +56,9 @@ def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, s.src_from_noc_val_rdy = TestValRdySrcRTL(PktType, from_noc_pkts) s.sink_to_noc_val_rdy = TestNetSinkRTL(PktType, expected_to_noc_pkts, cmp_fn = cmp_func) - s.dut = ControllerRTL(ControllerIdType, CmdType, PktType, MsgType, - AddrType, controller_id, controller2addr_map) + s.dut = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, + PktType, MsgType, AddrType, controller_id, + controller2addr_map) # Connections s.src_from_master_load_request_pkt_en_rdy.send //= s.dut.recv_from_master_load_request_pkt @@ -72,6 +73,10 @@ def construct(s, ControllerIdType, CmdType, MsgType, AddrType, PktType, s.src_from_noc_val_rdy.send //= s.dut.recv_from_noc s.dut.send_to_noc //= s.sink_to_noc_val_rdy.recv + s.dut.recv_from_cpu_ctrl_pkt.val //= 0 + s.dut.recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() + s.dut.send_to_ctrl_ring_ctrl_pkt.rdy //= 0 + def done(s): return s.src_from_master_load_request_pkt_en_rdy.done() and \ s.src_from_master_load_response_pkt_en_rdy.done() and \ @@ -137,6 +142,13 @@ def mk_src_pkts( nterminals, lst ): nterminals = 4 CmdType = mk_bits(4) ControllerIdType = mk_bits(clog2(nterminals)) +num_ctrl_actions = 8 +ctrl_mem_size = 16 +num_ctrl_operations = 64 +num_fu_inports = 2 +num_fu_outports = 2 +num_tile_inports = 4 +num_tile_outports = 4 data_mem_size_global = 16 addr_nbits = clog2(data_mem_size_global) AddrType = mk_bits(addr_nbits) @@ -150,6 +162,15 @@ def mk_src_pkts( nterminals, lst ): 3: [12, 15], } +CtrlPktType = mk_ring_across_tiles_pkt(nterminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + Pkt = mk_ring_multi_cgra_pkt(nterminals, addr_nbits = addr_nbits, data_nbits = data_nbits, @@ -212,7 +233,8 @@ def mk_src_pkts( nterminals, lst ): def test_simple(): print("controller2addr_map: ", controller2addr_map) - th = TestHarness(ControllerIdType, CmdType, DataType, + th = TestHarness(ControllerIdType, CtrlPktType, + CmdType, DataType, AddrType, Pkt, controller_id, from_master_load_request_pkts, from_master_load_response_pkts, diff --git a/lib/messages.py b/lib/messages.py index 946d5ab..0b58aaf 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -130,12 +130,13 @@ def str_func( s ): ) -def mk_separate_ctrl(num_fu_inports = 4, +def mk_separate_ctrl(num_operations = 7, + num_fu_inports = 4, num_fu_outports = 2, num_tile_inports = 5, num_tile_outports = 5, prefix = "CGRAConfig" ): - operation_nbits = 6 + operation_nbits = clog2(num_operations) OperationType = mk_bits(operation_nbits) TileInportsType = mk_bits(clog2(num_tile_inports + 1)) TileOutportsType = mk_bits(clog2(num_tile_outports + 1)) @@ -237,7 +238,6 @@ def str_func(s): namespace = {'__str__': str_func} ) - #========================================================================= # Ring multi-CGRA data/config/cmd packet #========================================================================= @@ -294,6 +294,105 @@ def str_func(s): namespace = {'__str__': str_func} ) +#========================================================================= +# Ring for delivering ctrl signals and commands across tiles +#========================================================================= + +def mk_ring_across_tiles_pkt(nrouters = 4, + ctrl_actions = 8, + ctrl_mem_size = 4, + ctrl_operations = 7, + ctrl_fu_inports = 4, + ctrl_fu_outports = 4, + ctrl_tile_inports = 5, + ctrl_tile_outports = 5, + prefix="RingAcrossTilesPacket"): + + IdType = mk_bits(clog2(nrouters)) + opaque_nbits = 1 + OpqType = mk_bits(opaque_nbits) + CtrlActionType = mk_bits(clog2(ctrl_actions)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + CtrlOperationType = mk_bits(clog2(ctrl_operations)) + CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) + CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) + num_routing_outports = ctrl_tile_outports + ctrl_fu_inports + CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) + CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) + CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) + CtrlPredicateType = mk_bits(1) + VcIdType = mk_bits(1) + + new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ + f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_"\ + f"{ctrl_fu_outports}_{ctrl_tile_inports}_{ctrl_tile_outports}" + + + def str_func(s): + out_str = '(ctrl_operation)' + str(s.ctrl_operation) + out_str += '|(ctrl_fu_in)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_in[i])) + + out_str += '|(ctrl_predicate)' + out_str += str(int(s.ctrl_predicate)) + + out_str += '|(ctrl_routing_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_xbar_outport[i])) + + out_str += '|(ctrl_fu_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_xbar_outport[i])) + + out_str += '|(ctrl_predicate_in)' + for i in range(ctrl_tile_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_predicate_in[i])) + + return f"{s.src}>{s.dst}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ + f"{out_str}" + + field_dict = {} + field_dict['src'] = IdType + field_dict['dst'] = IdType + field_dict['opaque'] = OpqType + field_dict['vc_id'] = VcIdType + field_dict['ctrl_action'] = CtrlActionType + field_dict['ctrl_addr'] = CtrlAddrType + field_dict['ctrl_operation'] = CtrlOperationType + # TODO: need fix to pair `predicate` with specific operation. + # The 'predicate' indicates whether the current operation is based on + # the partial predication or not. Note that 'predicate' is different + # from the following 'predicate_in', which contributes to the 'predicate' + # at the next cycle. + field_dict['ctrl_predicate'] = CtrlPredicateType + # The fu_in indicates the input register ID (i.e., operands) for the + # operation. + field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] + + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( + num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( + num_routing_outports)] + # I assume one tile supports single predicate during the entire execution + # time, as it is hard to distinguish predication for different operations + # (we automatically update, i.e., 'or', the predicate stored in the + # predicate register). This should be guaranteed by the compiler. + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( + ctrl_tile_inports)] + + return mk_bitstruct(new_name, field_dict, + namespace = {'__str__': str_func} + ) + #========================================================================= # Crossbar (tiles <-> SRAM) packet #========================================================================= diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py new file mode 100644 index 0000000..9a96fa1 --- /dev/null +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -0,0 +1,142 @@ +""" +========================================================================== +CtrlMemDynamicRTL.py +========================================================================== +Control memory with dynamic reconfigurability (e.g., receiving control +signals, halt/terminate signals) for each CGRA tile. + +Author : Cheng Tan + Date : Dec 20, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.en_rdy.ifcs import SendIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ...lib.basic.val_rdy.queues import NormalQueueRTL +from ...lib.cmd_type import * +from ...lib.opt_type import * + +class CtrlMemDynamicRTL(Component): + + def construct(s, CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, ctrl_count_per_iter = 4, + total_ctrl_steps = 4): + + # The total_ctrl_steps indicates the number of steps the ctrl + # signals should proceed. For example, if the number of ctrl + # signals is 4 and they need to repeat 5 times, then the total + # number of steps should be 4 * 5 = 20. + # assert( ctrl_mem_size <= total_ctrl_steps ) + + # Constant + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + PCType = mk_bits(clog2(ctrl_count_per_iter + 1)) + TimeType = mk_bits(clog2(total_ctrl_steps + 1)) + num_routing_outports = num_tile_outports + num_fu_inports + + # Interface + s.send_ctrl = SendIfcRTL(CtrlSignalType) + s.recv_pkt = ValRdyRecvIfcRTL(CtrlPktType) + + # Component + s.reg_file = RegisterFile(CtrlSignalType, ctrl_mem_size, 1, 1) + # FIXME: valrdy normal queue RTL? + s.recv_pkt_queue = NormalQueueRTL(CtrlPktType) + s.times = Wire(TimeType) + s.start_iterate_ctrl = Wire(b1) + + # Connections + s.send_ctrl.msg //= s.reg_file.rdata[0] + # s.recv_pkt.rdy //= s.recv_pkt_queue.enq_rdy + s.recv_pkt //= s.recv_pkt_queue.recv + + @update + def update_msg(): + + s.recv_pkt_queue.send.rdy @= 0 + s.reg_file.wen[0] @= 0 + s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr + # Initializes the fields of the control signal. + # s.reg_file.wdata[0] @= CtrlSignalType() + s.reg_file.wdata[0].ctrl @= 0 + s.reg_file.wdata[0].predicate @= 0 + for i in range(num_fu_inports): + s.reg_file.wdata[0].fu_in[i] @= 0 + for i in range(num_routing_outports): + s.reg_file.wdata[0].routing_xbar_outport[i] @= 0 + s.reg_file.wdata[0].fu_xbar_outport[i] @= 0 + for i in range(num_tile_inports): + s.reg_file.wdata[0].routing_predicate_in[i] @= 0 + + if s.recv_pkt_queue.send.val & (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG): + s.reg_file.wen[0] @= 1 # s.recv_pkt_queue.deq_en + s.reg_file.waddr[0] @= s.recv_pkt_queue.send.msg.ctrl_addr + # Fills the fields of the control signal. + s.reg_file.wdata[0].ctrl @= s.recv_pkt_queue.send.msg.ctrl_operation + s.reg_file.wdata[0].predicate @= s.recv_pkt_queue.send.msg.ctrl_predicate + for i in range(num_fu_inports): + s.reg_file.wdata[0].fu_in[i] @= s.recv_pkt_queue.send.msg.ctrl_fu_in[i] + for i in range(num_routing_outports): + s.reg_file.wdata[0].routing_xbar_outport[i] @= s.recv_pkt_queue.send.msg.ctrl_routing_xbar_outport[i] + s.reg_file.wdata[0].fu_xbar_outport[i] @= s.recv_pkt_queue.send.msg.ctrl_fu_xbar_outport[i] + for i in range(num_tile_inports): + s.reg_file.wdata[0].routing_predicate_in[i] @= s.recv_pkt_queue.send.msg.ctrl_routing_predicate_in[i] + + if (s.recv_pkt_queue.send.msg.ctrl_action == CMD_CONFIG) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE): + s.recv_pkt_queue.send.rdy @= 1 + # TODO: Extend for the other commands. Maybe another queue to + # handle complicated actions. + # else: + + + @update + def update_send_out_signal(): + s.send_ctrl.en @= 0 + if s.start_iterate_ctrl == b1(1): + if ((total_ctrl_steps > 0) & \ + (s.times == TimeType(total_ctrl_steps))) | \ + (s.reg_file.rdata[0].ctrl == OPT_START): + s.send_ctrl.en @= b1(0) + else: + s.send_ctrl.en @= s.send_ctrl.rdy + if s.recv_pkt_queue.send.val & \ + ((s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE) | \ + (s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE)): + s.send_ctrl.en @= b1(0) + + @update_ff + def update_whether_we_can_iterate_ctrl(): + if s.recv_pkt_queue.send.val: + # @yo96? data is still there, not released yet? + if s.recv_pkt_queue.send.msg.ctrl_action == CMD_LAUNCH: + s.start_iterate_ctrl <<= 1 + elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_TERMINATE: + s.start_iterate_ctrl <<= 0 + elif s.recv_pkt_queue.send.msg.ctrl_action == CMD_PAUSE: + s.start_iterate_ctrl <<= 0 + # else: + # s.start_iterate_ctrl <<= 1 + + @update_ff + def update_raddr(): + if s.start_iterate_ctrl == b1(1): + if (total_ctrl_steps == 0) | \ + (s.times < TimeType(total_ctrl_steps)): + s.times <<= s.times + TimeType(1) + # Reads the next ctrl signal only when the current one is done. + if s.send_ctrl.rdy: + if zext(s.reg_file.raddr[0] + 1, PCType) == \ + PCType(ctrl_count_per_iter): + s.reg_file.raddr[0] <<= CtrlAddrType(0) + else: + s.reg_file.raddr[0] <<= s.reg_file.raddr[0] + CtrlAddrType(1) + + def line_trace(s): + config_mem_str = "|".join([str(data) for data in s.reg_file.regs]) + return f'{s.recv_pkt.msg} || config_mem: [{config_mem_str}] || out: {s.send_ctrl.msg}' + diff --git a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py new file mode 100644 index 0000000..f01ea06 --- /dev/null +++ b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py @@ -0,0 +1,59 @@ +""" +========================================================================== +RingMultiCtrlMemDynamicRTL.py +========================================================================== +Ring connecting multiple control memories. + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from .CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ...lib.basic.en_rdy.ifcs import SendIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ...lib.opt_type import * +from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL + +class RingMultiCtrlMemDynamicRTL(Component): + def construct(s, CtrlPktType, CtrlSignalType, width, height, + ctrl_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + ctrl_count_per_iter = 4, total_ctrl_steps = 4): + # Constant + num_terminals = width * height + CtrlRingPos = mk_ring_pos(num_terminals) + s.num_terminals = width * height + + # Interface + s.send_ctrl = [SendIfcRTL(CtrlSignalType) for _ in range(s.num_terminals)] + s.recv_pkt_from_controller = ValRdyRecvIfcRTL(CtrlPktType) + + # Components + s.ctrl_memories = [ + CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, ctrl_count_per_iter, + total_ctrl_steps) for terminal_id in range(s.num_terminals)] + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, num_terminals, 0) + + # Connections + for i in range(s.num_terminals): + s.ctrl_ring.send[i] //= s.ctrl_memories[i].recv_pkt + + s.ctrl_ring.recv[0] //= s.recv_pkt_from_controller + for i in range(1, s.num_terminals): + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + + for i in range(s.num_terminals): + s.ctrl_memories[i].send_ctrl //= s.send_ctrl[i] + + def line_trace(s): + res = "||\n".join([(("[ctrl_memory["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.ctrl_memories)]) + res += " ## ctrl_ring: " + s.ctrl_ring.line_trace() + return res + diff --git a/mem/ctrl/test/CtrlMemDynamicRTL_test.py b/mem/ctrl/test/CtrlMemDynamicRTL_test.py new file mode 100644 index 0000000..5fab794 --- /dev/null +++ b/mem/ctrl/test/CtrlMemDynamicRTL_test.py @@ -0,0 +1,137 @@ +""" +========================================================================== +CtrlMemDynamicRTL_test.py +========================================================================== +Test cases for control memory with command-based action handling. + +Author : Cheng Tan + Date : Dec 21, 2024 +""" + +from pymtl3 import * +from ..CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ....fu.single.AdderRTL import AdderRTL +from ....lib.basic.en_rdy.test_sinks import TestSinkRTL +from ....lib.basic.en_rdy.test_srcs import TestSrcRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL +from ....lib.messages import * +from ....lib.cmd_type import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness( Component ): + + def construct( s, MemUnit, DataType, PredicateType, CtrlPktType, + CtrlSignalType, ctrl_mem_size, data_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, src0_msgs, src1_msgs, ctrl_pkts, + sink_msgs): + + AddrType = mk_bits(clog2(ctrl_mem_size)) + + s.src_data0 = TestSrcRTL(DataType, src0_msgs) + s.src_data1 = TestSrcRTL(DataType, src1_msgs) + # s.src_waddr = TestSrcRTL(AddrType, ctrl_waddr ) + # s.src_wdata = TestSrcRTL(ConfigType, ctrl_msgs ) + s.src_pkt = ValRdyTestSrcRTL(CtrlPktType, ctrl_pkts) + s.sink_out = TestSinkRTL(DataType, sink_msgs) + + s.alu = AdderRTL(DataType, PredicateType, CtrlSignalType, 2, 2, + data_mem_size ) + s.ctrl_mem = MemUnit(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, len(ctrl_pkts), len(ctrl_pkts)) + + s.alu.recv_in_count[0] //= 1 + s.alu.recv_in_count[1] //= 1 + + connect(s.alu.recv_opt, s.ctrl_mem.send_ctrl) + + # connect(s.src_waddr.send, s.ctrl_mem.recv_waddr) + # connect(s.src_wdata.send, s.ctrl_mem.recv_ctrl) + connect(s.src_pkt.send, s.ctrl_mem.recv_pkt) + + connect(s.src_data0.send, s.alu.recv_in[0]) + connect(s.src_data1.send, s.alu.recv_in[1]) + connect(s.alu.send_out[0], s.sink_out.recv) + + def done(s): + return s.src_data0.done() and s.src_data1.done() and \ + s.src_pkt.done() and s.sink_out.done() + + def line_trace(s): + return s.alu.line_trace() + " || " +s.ctrl_mem.line_trace() + +def run_sim(test_harness, max_cycles = 20): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print( "{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_Ctrl(): + MemUnit = CtrlMemDynamicRTL + DataType = mk_data(16, 1) + PredicateType = mk_predicate(1, 1) + ctrl_mem_size = 16 + ctrl_addr_nbits = clog2(ctrl_mem_size) + data_mem_size = 8 + num_fu_inports = 2 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_terminals = 4 + num_ctrl_actions = 6 + ctrl_action_nbits = clog2(num_ctrl_actions) + num_ctrl_operations = 64 + CtrlPktType = mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + AddrType = mk_bits(clog2(ctrl_mem_size)) + src_data0 = [DataType(1, 1), DataType(5, 1), DataType(7, 1), DataType(6, 1)] + src_data1 = [DataType(6, 1), DataType(1, 1), DataType(2, 1), DataType(3, 1)] + + src_ctrl_pkt = [CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 3, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] + + sink_out = [DataType(7, 1), DataType(4, 1), DataType(5, 1), DataType(9, 1)] + th = TestHarness(MemUnit, DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, src_data0, src_data1, + src_ctrl_pkt, sink_out) + run_sim(th) + diff --git a/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py b/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py new file mode 100644 index 0000000..c6655b0 --- /dev/null +++ b/mem/ctrl/test/RingCtrlMemDynamicRTL_test.py @@ -0,0 +1,147 @@ +""" +========================================================================== +CtrlMemDynamicRTL_test.py +========================================================================== +Test cases for control memory with command-based action handling. + +Author : Cheng Tan + Date : Dec 21, 2024 +""" + +from pymtl3 import * +from ..RingMultiCtrlMemDynamicRTL import RingMultiCtrlMemDynamicRTL +from ....fu.single.AdderRTL import AdderRTL +from ....lib.basic.en_rdy.test_sinks import TestSinkRTL +from ....lib.basic.en_rdy.test_srcs import TestSrcRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL +from ....lib.messages import * +from ....lib.cmd_type import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness( Component ): + + def construct( s, DUT, DataType, PredicateType, CtrlPktType, + CtrlSignalType, ctrl_mem_size, width, height, + data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, ctrl_pkts, + sink_msgs): + + s.width = width + s.height = height + s.src_pkt = ValRdyTestSrcRTL(CtrlPktType, ctrl_pkts) + s.sink_out = [TestSinkRTL(CtrlSignalType, sink_msgs[i]) + for i in range(width * height)] + + s.dut = \ + DUT(CtrlPktType, CtrlSignalType, width, height, + ctrl_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + len(ctrl_pkts), len(ctrl_pkts)) + + connect(s.src_pkt.send, s.dut.recv_pkt_from_controller) + for i in range(width * height): + connect(s.dut.send_ctrl[i], s.sink_out[i].recv) + + def done(s): + if not s.src_pkt.done(): + return False + for i in range(s.width * s.height): + if not s.sink_out[i].done(): + return False + return True + + def line_trace(s): + return s.dut.line_trace() + +def run_sim(test_harness, max_cycles = 40): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format( ncycles, test_harness.line_trace())) + + # Check timeout + + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_Ctrl(): + MemUnit = RingMultiCtrlMemDynamicRTL + DataType = mk_data(16, 1) + PredicateType = mk_predicate(1, 1) + ctrl_mem_size = 16 + ctrl_addr_nbits = clog2(ctrl_mem_size) + data_mem_size = 8 + num_fu_inports = 2 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + width = 2 + height = 2 + num_terminals = width * height + num_ctrl_actions = 6 + ctrl_action_nbits = clog2(num_ctrl_actions) + num_ctrl_operations = 64 + CtrlPktType = mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + + src_ctrl_pkt = [ # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 1, 0, 0, CMD_LAUNCH, 0, OPT_SUB, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 2, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister), + CtrlPktType(0, 3, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister)] + + sink_out = [ + [CtrlSignalType(OPT_ADD, 0, pickRegister), + CtrlSignalType(OPT_SUB, 0, pickRegister)], + # Ctrl memory 1 first write into address 1, then address 0. + [CtrlSignalType(OPT_ADD, 0, pickRegister), + CtrlSignalType(OPT_SUB, 0, pickRegister)], + + [CtrlSignalType(OPT_SUB, 0, pickRegister), + CtrlSignalType(OPT_ADD, 0, pickRegister)], + + [CtrlSignalType(OPT_SUB, 0, pickRegister), + CtrlSignalType(OPT_ADD, 0, pickRegister)]] + th = TestHarness(MemUnit, DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, width, height, data_mem_size, num_fu_inports, + num_fu_outports, num_tile_inports, num_tile_outports, + src_ctrl_pkt, sink_out) + run_sim(th) + diff --git a/scale_out/RingMultiCGRARTL.py b/scale_out/RingMultiCGRARTL.py deleted file mode 100644 index e577283..0000000 --- a/scale_out/RingMultiCGRARTL.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -========================================================================== -RingMultiCGRARTL.py -========================================================================== -Ring connecting multiple CGRAs, each CGRA contains one controller. - -Author : Cheng Tan - Date : Dec 4, 2024 -""" - - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL -from ..lib.opt_type import * -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..cgra.CGRAWithCrossbarDataMemRTL import CGRAWithCrossbarDataMemRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos - - -class RingMultiCGRARTL(Component): - - def construct(s, CGRADataType, PredicateType, CtrlType, NocPktType, - CmdType, num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, num_ctrl, total_steps, - FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None): - - # Constant - RingPos = mk_ring_pos(num_terminals) - s.num_terminals = num_terminals - s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - ControllerIdType = mk_bits(clog2(num_terminals)) - - # Interface - # # Request from/to CPU. - # s.recv_from_cpu = RecvIfcRTL(CGRADataType) - # s.send_to_cpu = SendIfcRTL(CGRADataType) - s.recv_waddr = [[RecvIfcRTL(CtrlAddrType) for _ in range(s.num_tiles)] - for _ in range(s.num_terminals)] - s.recv_wopt = [[RecvIfcRTL(CtrlType) for _ in range(s.num_tiles)] - for _ in range(s.num_terminals)] - - # Components - s.cgra = [CGRAWithCrossbarDataMemRTL( - CGRADataType, PredicateType, CtrlType, NocPktType, CmdType, - ControllerIdType, terminal_id, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None) - for terminal_id in range(s.num_terminals)] - s.ring = RingNetworkRTL(NocPktType, RingPos, num_terminals, 0) - - # Connections - for i in range(s.num_terminals): - s.ring.send[i] //= s.cgra[i].recv_from_noc - s.ring.recv[i] //= s.cgra[i].send_to_noc - - for j in range(s.num_tiles): - s.recv_waddr[i][j] //= s.cgra[i].recv_waddr[j] - s.recv_wopt[i][j] //= s.cgra[i].recv_wopt[j] - - - def line_trace(s): - res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) - for (i,x) in enumerate(s.cgra)]) - res += " ## ring: " + s.ring.line_trace() - return res - diff --git a/scale_out/RingMultiCgraRingCtrlMemRTL.py b/scale_out/RingMultiCgraRingCtrlMemRTL.py new file mode 100644 index 0000000..9dc243c --- /dev/null +++ b/scale_out/RingMultiCgraRingCtrlMemRTL.py @@ -0,0 +1,106 @@ +""" +========================================================================== +RingMultiCgraRingCtrlMemRTL.py +========================================================================== +Ring connecting multiple CGRAs, each CGRA contains one controller. + +Author : Cheng Tan + Date : Dec 23, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ..cgra.CgraCrossbarDataMemRingCtrlMemRTL import CgraCrossbarDataMemRingCtrlMemRTL +from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL +from ..lib.opt_type import * +from ..lib.util.common import * +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL + +class RingMultiCgraRingCtrlMemRTL(Component): + def construct(s, CGRADataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, cgra_rows, + cgra_columns, tile_rows, tile_columns, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, num_ctrl, total_steps, FunctionUnit, + FuList, controller2addr_map, preload_data = None, + preload_const = None): + + # Constant + s.num_terminals = cgra_rows * cgra_columns + RingPos = mk_ring_pos(s.num_terminals) + s.num_tiles = tile_rows * tile_columns + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + ControllerIdType = mk_bits(clog2(s.num_terminals)) + + # Interface + # Request from/to CPU. + s.recv_from_cpu_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) + + # Components + s.cgra = [CgraCrossbarDataMemRingCtrlMemRTL( + CGRADataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, ControllerIdType, terminal_id, + tile_columns, tile_rows, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, num_ctrl, + total_steps, FunctionUnit, FuList, controller2addr_map, + preload_data = None, preload_const = None) + for terminal_id in range(s.num_terminals)] + s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_terminals, 0) + + # Connections + s.recv_from_cpu_ctrl_pkt //= s.cgra[0].recv_from_cpu_ctrl_pkt + for i in range(s.num_terminals): + s.ring.send[i] //= s.cgra[i].recv_from_noc + s.ring.recv[i] //= s.cgra[i].send_to_noc + + for i in range(1, s.num_terminals): + s.cgra[i].recv_from_cpu_ctrl_pkt.val //= 0 + s.cgra[i].recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() + + # Connects the tiles on the boundary of each two ajacent CGRAs. + for cgra_row in range(cgra_rows): + for cgra_col in range(cgra_columns): + if cgra_row != 0: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] + else: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CGRADataType() + + if cgra_row == cgra_rows - 1: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CGRADataType() + + if cgra_col != 0: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] + else: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CGRADataType() + + if cgra_col == cgra_columns - 1: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].en //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CGRADataType() + + def line_trace(s): + res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.cgra)]) + res += " ## ring: " + s.ring.line_trace() + return res + diff --git a/scale_out/translate/RingMultiCGRARTL_test.py b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py similarity index 57% rename from scale_out/translate/RingMultiCGRARTL_test.py rename to scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py index a73a2b9..ea2a223 100644 --- a/scale_out/translate/RingMultiCGRARTL_test.py +++ b/scale_out/test/RingMultiCgraRingCtrlMemRTL_test.py @@ -1,11 +1,11 @@ """ ========================================================================== -CGRAWithControllerRTL_test.py +RingMultiCgraRingCtrlMemRTL_test.py ========================================================================== Test cases for CGRA with controller. Author : Cheng Tan - Date : Dec 4, 2024 + Date : Dec 23, 2024 """ @@ -14,43 +14,45 @@ config_model_with_cmdline_opts) from pymtl3.passes.backends.verilog import (VerilogTranslationPass, VerilogVerilatorImportPass) -from ..RingMultiCGRARTL import RingMultiCGRARTL +from ..RingMultiCgraRingCtrlMemRTL import RingMultiCgraRingCtrlMemRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.MemUnitRTL import MemUnitRTL from ...fu.single.ShifterRTL import ShifterRTL from ...lib.messages import * from ...lib.opt_type import * +from ...lib.cmd_type import * from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, src_opt, - ctrl_waddr, controller2addr_map): + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + cgra_rows, cgra_columns, width, height, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, + controller2addr_map): - s.num_terminals = num_terminals + s.num_terminals = cgra_rows * cgra_columns s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, len(src_opt[0]), len(src_opt[0]), + # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + # s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + # s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, cgra_rows, cgra_columns, height, width, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) # Connections @@ -64,17 +66,19 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, # s.dut.recv_towards_controller.msg //= DataType(0, 0) # s.dut.send_from_controller.rdy //= 0 - for i in range(num_terminals): - for j in range(s.num_tiles): - connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) - connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + # for i in range(num_terminals): + # for j in range(s.num_tiles): + # connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) + # connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt def done(s): - for i in range(s.num_terminals): - for j in range(s.num_tiles): - if not s.src_opt[i][j].done(): - return False - return True + return s.src_ctrl_pkt.done() + # for i in range(s.num_terminals): + # for j in range(s.num_tiles): + # if not s.src_opt[i][j].done(): + # return False + # return True def line_trace(s): return s.dut.line_trace() @@ -89,18 +93,22 @@ def test_homo_2x2(cmdline_opts): data_mem_size_global = 32 data_mem_size_per_bank = 4 num_banks_per_cgra = 2 - num_terminals = 4 + cgra_rows = 2 + cgra_columns = 2 + num_terminals = cgra_rows * cgra_columns width = 2 height = 2 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) ctrl_addr_nbits = clog2(ctrl_mem_size) - CtrlAddrType = mk_bits(ctrl_addr_nbits) + # CtrlAddrType = mk_bits(ctrl_addr_nbits) data_addr_nbits = clog2(data_mem_size_global) DataAddrType = mk_bits(clog2(data_mem_size_global)) num_tiles = width * height - DUT = RingMultiCGRARTL + DUT = RingMultiCgraRingCtrlMemRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] DataType = mk_data(32, 1) @@ -112,15 +120,29 @@ def test_homo_2x2(cmdline_opts): 2: [16, 23], 3: [24, 31], } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, addr_nbits = data_addr_nbits, data_nbits = 32, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), # TODO: make below as TileInType(5) to double check. @@ -128,7 +150,15 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -136,7 +166,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -144,7 +174,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_STR, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -152,7 +182,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -160,25 +190,29 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, controller2addr_map) + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, cgra_rows, cgra_columns, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, + ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) diff --git a/scale_out/test/RingMultiCGRARTL_test.py b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py similarity index 57% rename from scale_out/test/RingMultiCGRARTL_test.py rename to scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py index a73a2b9..ea2a223 100644 --- a/scale_out/test/RingMultiCGRARTL_test.py +++ b/scale_out/translate/RingMultiCgraRingCtrlMemRTL_test.py @@ -1,11 +1,11 @@ """ ========================================================================== -CGRAWithControllerRTL_test.py +RingMultiCgraRingCtrlMemRTL_test.py ========================================================================== Test cases for CGRA with controller. Author : Cheng Tan - Date : Dec 4, 2024 + Date : Dec 23, 2024 """ @@ -14,43 +14,45 @@ config_model_with_cmdline_opts) from pymtl3.passes.backends.verilog import (VerilogTranslationPass, VerilogVerilatorImportPass) -from ..RingMultiCGRARTL import RingMultiCGRARTL +from ..RingMultiCgraRingCtrlMemRTL import RingMultiCgraRingCtrlMemRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.MemUnitRTL import MemUnitRTL from ...fu.single.ShifterRTL import ShifterRTL from ...lib.messages import * from ...lib.opt_type import * +from ...lib.cmd_type import * from ...lib.basic.en_rdy.test_srcs import TestSrcRTL - +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- class TestHarness(Component): - def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, src_opt, - ctrl_waddr, controller2addr_map): + CtrlPktType, CtrlSignalType, NocPktType, CmdType, + cgra_rows, cgra_columns, width, height, ctrl_mem_size, + data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, + controller2addr_map): - s.num_terminals = num_terminals + s.num_terminals = cgra_rows * cgra_columns s.num_tiles = width * height - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - - s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) - for j in range(s.num_tiles)] - for i in range(s.num_terminals)] - - s.dut = DUT(DataType, PredicateType, CtrlType, NocPktType, CmdType, - num_terminals, width, height, ctrl_mem_size, - data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, len(src_opt[0]), len(src_opt[0]), + # CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + # s.src_opt = [[TestSrcRTL(CtrlType, src_opt[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + # s.ctrl_waddr = [[TestSrcRTL(CtrlAddrType, ctrl_waddr[j]) + # for j in range(s.num_tiles)] + # for i in range(s.num_terminals)] + + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + NocPktType, CmdType, cgra_rows, cgra_columns, height, width, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, ctrl_steps, ctrl_steps, FunctionUnit, FuList, controller2addr_map) # Connections @@ -64,17 +66,19 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, # s.dut.recv_towards_controller.msg //= DataType(0, 0) # s.dut.send_from_controller.rdy //= 0 - for i in range(num_terminals): - for j in range(s.num_tiles): - connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) - connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + # for i in range(num_terminals): + # for j in range(s.num_tiles): + # connect(s.src_opt[i][j].send, s.dut.recv_wopt[i][j]) + # connect(s.ctrl_waddr[i][j].send, s.dut.recv_waddr[i][j]) + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt def done(s): - for i in range(s.num_terminals): - for j in range(s.num_tiles): - if not s.src_opt[i][j].done(): - return False - return True + return s.src_ctrl_pkt.done() + # for i in range(s.num_terminals): + # for j in range(s.num_tiles): + # if not s.src_opt[i][j].done(): + # return False + # return True def line_trace(s): return s.dut.line_trace() @@ -89,18 +93,22 @@ def test_homo_2x2(cmdline_opts): data_mem_size_global = 32 data_mem_size_per_bank = 4 num_banks_per_cgra = 2 - num_terminals = 4 + cgra_rows = 2 + cgra_columns = 2 + num_terminals = cgra_rows * cgra_columns width = 2 height = 2 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) ctrl_addr_nbits = clog2(ctrl_mem_size) - CtrlAddrType = mk_bits(ctrl_addr_nbits) + # CtrlAddrType = mk_bits(ctrl_addr_nbits) data_addr_nbits = clog2(data_mem_size_global) DataAddrType = mk_bits(clog2(data_mem_size_global)) num_tiles = width * height - DUT = RingMultiCGRARTL + DUT = RingMultiCgraRingCtrlMemRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] DataType = mk_data(32, 1) @@ -112,15 +120,29 @@ def test_homo_2x2(cmdline_opts): 2: [16, 23], 3: [24, 31], } - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) + CtrlPktType = \ + mk_ring_across_tiles_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals, addr_nbits = data_addr_nbits, data_nbits = 32, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - src_opt = [[ - CtrlType(OPT_INC, b1(0), + src_opt_per_tile = [[ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 0, OPT_INC, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), # TODO: make below as TileInType(5) to double check. @@ -128,7 +150,15 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_INC, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 1, OPT_INC, b1(0), + pickRegister, + [TileInType(4), TileInType(3), TileInType(2), TileInType(1), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), + + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 2, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -136,7 +166,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 3, OPT_STR, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -144,7 +174,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_STR, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 4, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -152,7 +182,7 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + CtrlPktType(0, i, 0, 0, CMD_CONFIG, 5, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -160,25 +190,29 @@ def test_homo_2x2(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]), - CtrlType(OPT_ADD, b1(0), + # This last one is for launching kernel. + CtrlPktType(0, i, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister, [TileInType(4), TileInType(3), TileInType(2), TileInType(1), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(1), FuOutType(1), FuOutType(1)]) + ] for i in range(num_tiles)] + + src_ctrl_pkt = [] + for opt_per_tile in src_opt_per_tile: + src_ctrl_pkt.extend(opt_per_tile) - ] for _ in range(num_tiles)] - ctrl_waddr = [[CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(2), CtrlAddrType(3), - CtrlAddrType(4), CtrlAddrType(5)] for _ in range(num_tiles)] - th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, NocPktType, CmdType, num_terminals, width, height, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, src_opt, ctrl_waddr, controller2addr_map) + th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, CtrlPktType, + CtrlSignalType, NocPktType, CmdType, cgra_rows, cgra_columns, + width, height, ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, src_ctrl_pkt, + ctrl_mem_size, controller2addr_map) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th) diff --git a/cgra/CGRAMemBottomRTL.py b/systolic/CgraMemBottomRTL.py similarity index 96% rename from cgra/CGRAMemBottomRTL.py rename to systolic/CgraMemBottomRTL.py index 92c484f..6b22c79 100644 --- a/cgra/CGRAMemBottomRTL.py +++ b/systolic/CgraMemBottomRTL.py @@ -1,6 +1,6 @@ """ ========================================================================= -CGRAMemBottomRTL.py +CgraMemBottomRTL.py ========================================================================= The scrachpad memory is connected to the bottom (first row) tiles. @@ -21,8 +21,7 @@ from ..noc.CrossbarRTL import CrossbarRTL from ..tile.TileRTL import TileRTL -class CGRAMemBottomRTL(Component): - +class CgraMemBottomRTL(Component): def construct(s, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, FunctionUnit, FuList, preload_data = None, diff --git a/cgra/CGRAMemRightAndBottomRTL.py b/systolic/CgraMemRightAndBottomRTL.py similarity index 96% rename from cgra/CGRAMemRightAndBottomRTL.py rename to systolic/CgraMemRightAndBottomRTL.py index 1436276..f96550f 100644 --- a/cgra/CGRAMemRightAndBottomRTL.py +++ b/systolic/CgraMemRightAndBottomRTL.py @@ -1,6 +1,6 @@ """ ========================================================================= -CGRAMemRightAndBottomRTL.py +CgraMemRightAndBottomRTL.py ========================================================================= Two scrachpad memories are connected to the bottom (first row) and the last column (except the one on the first row) tiles. For example, in a @@ -24,9 +24,7 @@ from ..noc.CrossbarRTL import CrossbarRTL from ..tile.TileRTL import TileRTL - -class CGRAMemRightAndBottomRTL(Component): - +class CgraMemRightAndBottomRTL(Component): def construct(s, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, FunctionUnit, FuList, preload_data = None, diff --git a/cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py b/systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py similarity index 97% rename from cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py rename to systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py index f1d510e..69ffc0f 100644 --- a/cgra/translate/CGRAMemBottomRTL_matmul_2x2_test.py +++ b/systolic/translate/CgraMemBottomRTL_matmul_2x2_test.py @@ -1,6 +1,6 @@ """ ========================================================================== -CGRAMemBottomRTL_matmul_2x2_test.py +CgraMemBottomRTL_matmul_2x2_test.py ========================================================================== Translation for 3x2 CGRA. The provided test is only used for a 2x2 matmul. @@ -13,7 +13,7 @@ from pymtl3.passes.backends.verilog import VerilogTranslationPass from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from ..CGRAMemBottomRTL import CGRAMemBottomRTL +from ..CgraMemBottomRTL import CgraMemBottomRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.BranchRTL import BranchRTL @@ -30,7 +30,6 @@ from ...lib.messages import * from ...lib.opt_type import * - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -38,7 +37,6 @@ kMaxCycles = 20 class TestHarness(Component): - def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, src_opt, ctrl_waddr, preload_data, preload_const, @@ -110,13 +108,11 @@ def test_CGRA_systolic(cmdline_opts): AddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height num_fu_in = 4 - DUT = CGRAMemBottomRTL + DUT = CgraMemBottomRTL FunctionUnit = FlexibleFuRTL FuList = [SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - # FuList = [ SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL ] - # DataType = mk_data(16, 1) CtrlType = mk_ctrl(num_fu_in, num_xbar_inports, num_xbar_outports) FuInType = mk_bits(clog2( num_fu_in + 1)) pickRegister = [FuInType(x + 1) for x in range(num_fu_in)] @@ -301,7 +297,7 @@ def test_CGRA_systolic(cmdline_opts): th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, - f'CGRARTL') + f'CgraMemBottomRTL') # th.dut.set_metadata( VerilogVerilatorImportPass.vl_Wno_list, # ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', # 'ALWCOMBORDER'] ) diff --git a/cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py b/systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py similarity index 97% rename from cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py rename to systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py index 7ad2000..02dfb58 100644 --- a/cgra/translate/CGRAMemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/translate/CgraMemRightAndBottomRTL_matmul_2x2_test.py @@ -1,6 +1,6 @@ """ ========================================================================== -CGRARightAndBottomRTL_matmul_2x2_test.py +CgraRightAndBottomRTL_matmul_2x2_test.py ========================================================================== Translation for 3x3 CGRA. The provided test is only used for a 2x2 matmul. @@ -13,7 +13,7 @@ from pymtl3.passes.backends.verilog import VerilogTranslationPass from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from ..CGRAMemRightAndBottomRTL import CGRAMemRightAndBottomRTL +from ..CgraMemRightAndBottomRTL import CgraMemRightAndBottomRTL from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...fu.single.AdderRTL import AdderRTL from ...fu.single.BranchRTL import BranchRTL @@ -30,7 +30,6 @@ from ...lib.messages import * from ...lib.opt_type import * - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -38,7 +37,6 @@ kMaxCycles = 12 class TestHarness(Component): - def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, CtrlType, width, height, ctrl_mem_size, data_mem_size, src_opt, ctrl_waddr, preload_data, preload_const, @@ -59,12 +57,6 @@ def construct(s, DUT, FunctionUnit, fu_list, DataType, PredicateType, kMaxCycles, FunctionUnit, fu_list, preload_data, preload_const) - # s.sink_out = [TestSinkRTL(DataType, sink_out[i]) - # for i in range(height - 1)] - - # for i in range(height - 1): - # connect(s.dut.send_data[i], s.sink_out[i].recv) - for i in range(s.num_tiles): connect(s.src_opt[i].send, s.dut.recv_wopt[i]) connect(s.ctrl_waddr[i].send, s.dut.recv_waddr[i]) @@ -134,13 +126,11 @@ def test_CGRA_systolic(cmdline_opts): AddrType = mk_bits(clog2(ctrl_mem_size)) num_tiles = width * height num_fu_in = 4 - DUT = CGRAMemRightAndBottomRTL + DUT = CgraMemRightAndBottomRTL FunctionUnit = FlexibleFuRTL FuList = [SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL] DataType = mk_data(32, 1) PredicateType = mk_predicate(1, 1) - # FuList = [ SeqMulAdderRTL, AdderRTL, MulRTL, LogicRTL, ShifterRTL, PhiRTL, CompRTL, BranchRTL, MemUnitRTL ] - # DataType = mk_data(16, 1) CtrlType = mk_ctrl(num_fu_in, num_xbar_inports, num_xbar_outports) FuInType = mk_bits(clog2( num_fu_in + 1)) pickRegister = [FuInType(x + 1) for x in range(num_fu_in)] @@ -403,7 +393,7 @@ def test_CGRA_systolic(cmdline_opts): th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, - f'CGRAMemRightAndBottomRTL') + f'CgraMemRightAndBottomRTL') # th.dut.set_metadata( VerilogVerilatorImportPass.vl_Wno_list, # ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', # 'ALWCOMBORDER'] ) diff --git a/systolic/translate/__init__.py b/systolic/translate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tile/TileSeparateCrossbarRTL.py b/tile/TileSeparateCrossbarRTL.py index 4dff881..0a6250d 100644 --- a/tile/TileSeparateCrossbarRTL.py +++ b/tile/TileSeparateCrossbarRTL.py @@ -25,8 +25,9 @@ from ..fu.single.MemUnitRTL import MemUnitRTL from ..fu.single.MulRTL import MulRTL from ..lib.basic.en_rdy.ifcs import SendIfcRTL, RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL from ..mem.const.ConstQueueRTL import ConstQueueRTL -from ..mem.ctrl.CtrlMemRTL import CtrlMemRTL +from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL from ..noc.CrossbarSeparateRTL import CrossbarSeparateRTL from ..noc.ChannelNormalRTL import ChannelNormalRTL from ..noc.LinkOrRTL import LinkOrRTL @@ -35,11 +36,10 @@ class TileSeparateCrossbarRTL(Component): - def construct(s, DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, - Fu = FlexibleFuRTL, + def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, data_mem_size, num_ctrl, total_steps, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, Fu = FlexibleFuRTL, FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], const_list = None): @@ -60,8 +60,9 @@ def construct(s, DataType, PredicateType, CtrlType, num_tile_outports)] # Ctrl. - s.recv_waddr = RecvIfcRTL(CtrlAddrType) - s.recv_wopt = RecvIfcRTL(CtrlType) + # s.recv_waddr = RecvIfcRTL(CtrlAddrType) + # s.recv_wopt = RecvIfcRTL(CtrlSignalType) + s.recv_ctrl_pkt = ValRdyRecvIfcRTL(CtrlPktType) # Data. s.to_mem_raddr = SendIfcRTL(DataAddrType) @@ -70,18 +71,20 @@ def construct(s, DataType, PredicateType, CtrlType, s.to_mem_wdata = SendIfcRTL(DataType) # Components. - s.element = FlexibleFuRTL(DataType, PredicateType, CtrlType, + s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) s.const_queue = ConstQueueRTL(DataType, const_list if const_list != None else [DataType(0)]) - s.routing_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlType, - num_routing_xbar_inports, - num_routing_xbar_outports) - s.fu_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlType, - num_fu_xbar_inports, - num_fu_xbar_outports) - s.ctrl_mem = CtrlMemRTL(CtrlType, ctrl_mem_size, num_ctrl, - total_steps) + s.routing_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlSignalType, + num_routing_xbar_inports, + num_routing_xbar_outports) + s.fu_crossbar = CrossbarSeparateRTL(DataType, PredicateType, CtrlSignalType, + num_fu_xbar_inports, + num_fu_xbar_outports) + s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, ctrl_mem_size, + num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + num_ctrl, total_steps) # The `tile_out_channel` indicates the outport channels that are # connected to the next tiles. s.tile_out_channel = [ChannelNormalRTL(DataType) for _ in range( @@ -105,8 +108,9 @@ def construct(s, DataType, PredicateType, CtrlType, # Connections. # Ctrl. - s.ctrl_mem.recv_waddr //= s.recv_waddr - s.ctrl_mem.recv_ctrl //= s.recv_wopt + # s.ctrl_mem.recv_waddr //= s.recv_waddr + # s.ctrl_mem.recv_ctrl //= s.recv_wopt + s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt # Constant queue. s.element.recv_const //= s.const_queue.send_const diff --git a/tile/test/TileSeparateCrossbarRTL_test.py b/tile/test/TileSeparateCrossbarRTL_test.py index 75dfeb2..6ee012b 100644 --- a/tile/test/TileSeparateCrossbarRTL_test.py +++ b/tile/test/TileSeparateCrossbarRTL_test.py @@ -24,11 +24,12 @@ from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ...lib.basic.en_rdy.test_sinks import TestSinkRTL from ...lib.basic.en_rdy.test_srcs import TestSrcRTL +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL from ...lib.messages import * +from ...lib.cmd_type import * from ...lib.opt_type import * from ...mem.ctrl.CtrlMemRTL import CtrlMemRTL - #------------------------------------------------------------------------- # Test harness #------------------------------------------------------------------------- @@ -36,32 +37,29 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, ctrl_mem_size, data_mem_size, - num_fu_inports, num_fu_outports, - src_data, src_opt, opt_waddr, sink_out): + CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, src_data, src_ctrl_pkt, sink_out): - AddrType = mk_bits(clog2(ctrl_mem_size)) + s.num_tile_inports = num_tile_inports + s.num_tile_outports = num_tile_outports - # s.src_predicate = TestSrcRTL( b1, src_predicate ) - s.src_opt = TestSrcRTL(CtrlType, src_opt) - s.opt_waddr = TestSrcRTL(AddrType, opt_waddr) + s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) s.src_data = [TestSrcRTL(DataType, src_data[i]) - for i in range(4)]#num_tile_inports)] + for i in range(num_tile_inports)] s.sink_out = [TestSinkRTL(DataType, sink_out[i]) - for i in range(4)]#num_tile_outports)] + for i in range(num_tile_outports)] - s.dut = DUT(DataType, PredicateType, CtrlType, - ctrl_mem_size, data_mem_size, len(src_opt), - len(src_opt), num_fu_inports, num_fu_outports, - 4, 4, FunctionUnit, FuList) + s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + ctrl_mem_size, data_mem_size, 3, 3, # 3 opts + num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, FunctionUnit, FuList) - # connect(s.src_predicate.send, s.dut.reg_predicate) - connect(s.src_opt.send, s.dut.recv_wopt) - connect(s.opt_waddr.send, s.dut.recv_waddr) + connect(s.src_ctrl_pkt.send, s.dut.recv_ctrl_pkt) - for i in range(4):# num_tile_inports): + for i in range(num_tile_inports): connect(s.src_data[i].send, s.dut.recv_data[i]) - for i in range(4):#num_tile_outports ): + for i in range(num_tile_outports): connect(s.dut.send_data[i], s.sink_out[i].recv) if MemUnitRTL in FuList: @@ -72,12 +70,15 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, s.dut.to_mem_wdata.rdy //= 0 def done(s): - done = True - for i in range(4): # s.num_tile_outports ): - if not s.sink_out[i].done(): # and not s.src_data[i].done(): - done = False - break - return done + for i in range(s.num_tile_inports): + if not s.src_data[i].done(): + return False + + for i in range(s.num_tile_outports): + if not s.sink_out[i].done(): + return False + + return True def line_trace(s): return s.dut.line_trace() @@ -90,11 +91,10 @@ def test_tile_alu(cmdline_opts): num_routing_outports = num_fu_inports + num_tile_outports ctrl_mem_size = 3 data_mem_size = 8 - # number of inputs of FU is fixed inside the tile - # num_fu_in = 4 - # num_fu_out = 2 + num_terminals = 4 + num_ctrl_actions = 6 + num_ctrl_operations = 64 TileInType = mk_bits(clog2(num_tile_inports + 1)) - AddrType = mk_bits(clog2(ctrl_mem_size)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) pickRegister0 = [FuInType(0) for x in range(num_fu_inports)] @@ -104,35 +104,53 @@ def test_tile_alu(cmdline_opts): FuList = [AdderRTL, MulRTL, MemUnitRTL] DataType = mk_data(16, 1) PredicateType = mk_predicate(1, 1) - CtrlType = mk_separate_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports) - opt_waddr = [AddrType(0), AddrType(1), AddrType(2)] - src_opt = [CtrlType(OPT_NAH, b1(0), pickRegister0, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(4), TileInType(3), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlType(OPT_ADD, b1(0), pickRegister1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(4), TileInType(1), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlType(OPT_SUB, b1(0), pickRegister1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] - src_data = [[DataType(3, 1)], # DataType( 3, 1)], - [], # DataType(3, 1), DataType( 4, 1)], - [DataType(4, 1)], # DataType( 5, 1)], + CtrlPktType = \ + mk_ring_across_tiles_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) + CtrlSignalType = \ + mk_separate_ctrl(num_ctrl_operations, num_fu_inports, + num_fu_outports, num_tile_inports, + num_tile_outports) + src_ctrl_pkt = [ + # src dst vc_id opq cmd_type addr operation predicate + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pickRegister0, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(4), TileInType(3), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(4), TileInType(1), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pickRegister1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] + src_data = [[DataType(3, 1)], + [], + [DataType(4, 1)], [DataType(5, 1), DataType(7, 1)]] - # src_predicate = [b1(0), b1(0), b1(0) ] src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] sink_out = [ # 7 - 3 = 4. @@ -140,16 +158,17 @@ def test_tile_alu(cmdline_opts): [], [], # 5 + 4 = 9; 7 - 3 = 4. - [DataType(9, 1), DataType( 4, 1)]] + [DataType(9, 1), DataType(4, 1)]] th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlType, ctrl_mem_size, data_mem_size, - num_fu_inports, num_fu_outports, - src_data, src_opt, opt_waddr, sink_out) + CtrlPktType, CtrlSignalType, ctrl_mem_size, + data_mem_size, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, src_data, + src_ctrl_pkt, sink_out) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) - th = config_model_with_cmdline_opts(th, cmdline_opts, duts=['dut']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) run_sim(th)