Skip to content

Commit

Permalink
Merge pull request #62 from tancheng/mesh_multi_cgra
Browse files Browse the repository at this point in the history
[feature] Model multi-cgra with mesh topology/noc
  • Loading branch information
tancheng authored Jan 10, 2025
2 parents 7007b8a + 305b9f0 commit cbd277b
Show file tree
Hide file tree
Showing 19 changed files with 607 additions and 126 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
mkdir -p build && cd build
source ${HOME}/venv/bin/activate
# Simulation across all tests.
pytest .. -v
pytest .. -v --tb=short
# Tile translation.
pytest ../tile/test/TileRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd
# CGRA template translation.
Expand All @@ -75,4 +75,6 @@ jobs:
# separate crossbars (for tiles and FUs), crossbar-based data memory (for
# multi-bank), and controller.
pytest ../scale_out/test/RingMultiCgraRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd
# Multi-cgra with mesh topology.
pytest ../scale_out/test/MeshMultiCgraRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd
23 changes: 13 additions & 10 deletions cgra/CgraRTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,21 @@
class CgraRTL(Component):

def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType,
NocPktType, CmdType, ControllerIdType, controller_id,
width, height, ctrl_mem_size, data_mem_size_global,
NocPktType, CmdType, ControllerIdType, multi_cgra_rows,
multi_cgra_columns, controller_id, width, height,
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra, num_ctrl,
total_steps, FunctionUnit, FuList, topology,
controller2addr_map, preload_data = None,
total_steps, FunctionUnit, FuList, cgra_topology,
controller2addr_map, idTo2d_map, preload_data = None,
preload_const = None):

# Other topology can simply modify the tiles connections, or
# leverage the template for modeling.
assert(topology == "Mesh" or topology == "KingMesh")
assert(cgra_topology == "Mesh" or cgra_topology == "KingMesh")
s.num_mesh_ports = 4
if topology == "Mesh":
if cgra_topology == "Mesh":
s.num_mesh_ports = 4
elif topology == "KingMesh":
elif cgra_topology == "KingMesh":
s.num_mesh_ports = 8

s.num_tiles = width * height
Expand Down Expand Up @@ -81,8 +82,10 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType,
preload_data)
s.controller = ControllerRTL(ControllerIdType, CmdType, CtrlPktType,
NocPktType, DataType, DataAddrType,
controller_id, controller2addr_map)
s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 0)
multi_cgra_rows, multi_cgra_columns,
controller_id, controller2addr_map,
idTo2d_map)
s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 1)

# Connections
# Connects data memory with controller.
Expand Down Expand Up @@ -123,7 +126,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType,
if i % width < width - 1:
s.tile[i].send_data[PORT_EAST] //= s.tile[i+1].recv_data[PORT_WEST]

if topology == "KingMesh":
if cgra_topology == "KingMesh":
if i % width > 0 and i // width < height - 1:
s.tile[i].send_data[PORT_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_SOUTHEAST]
s.tile[i+width-1].send_data[PORT_SOUTHEAST] //= s.tile[i].recv_data[PORT_NORTHWEST]
Expand Down
12 changes: 7 additions & 5 deletions cgra/CgraTemplateRTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
class CgraTemplateRTL(Component):

def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType,
NocPktType, CmdType, ControllerIdType, controller_id,
NocPktType, CmdType, ControllerIdType, multi_cgra_rows,
multi_cgra_columns, controller_id,
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra, num_ctrl,
total_steps, FunctionUnit, FuList, TileList, LinkList,
dataSPM, controller2addr_map, preload_data = None,
preload_const = None):
dataSPM, controller2addr_map, idTo2d_map,
preload_data = None, preload_const = None):

s.num_mesh_ports = 8
s.num_tiles = len(TileList)
Expand Down Expand Up @@ -77,8 +78,9 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType,
preload_data)
s.controller = ControllerRTL(ControllerIdType, CmdType, CtrlPktType,
NocPktType, DataType, DataAddrType,
controller_id, controller2addr_map)
s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 0)
multi_cgra_rows, multi_cgra_columns,
controller_id, controller2addr_map, idTo2d_map)
s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles, 1)

# Connections
# Connects data memory with controller.
Expand Down
34 changes: 23 additions & 11 deletions cgra/test/CgraRTL_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,20 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType,
ControllerIdType, controller_id, width, height,
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
src_ctrl_pkt, ctrl_steps, topology, controller2addr_map):
src_ctrl_pkt, ctrl_steps, topology, controller2addr_map,
idTo2d_map):

s.num_tiles = width * height
s.src_ctrl_pkt = TestSrcRTL(CtrlPktType, src_ctrl_pkt)
s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType,
NocPktType, CmdType, ControllerIdType, controller_id,
width, height, ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
ctrl_steps, ctrl_steps, FunctionUnit, FuList,
topology, controller2addr_map)
NocPktType, CmdType, ControllerIdType,
# CGRA terminals on x/y. Assume in total 4, though this
# test is for single CGRA.
1, 4,
controller_id, width, height, ctrl_mem_size,
data_mem_size_global, data_mem_size_per_bank,
num_banks_per_cgra, ctrl_steps, ctrl_steps, FunctionUnit,
FuList, topology, controller2addr_map, idTo2d_map)

# Connections
s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt
Expand Down Expand Up @@ -127,6 +131,13 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL]):
2: [8, 11],
3: [12, 15],
}

idTo2d_map = {
0: [0, 0],
1: [1, 0],
2: [2, 0],
3: [3, 0],
}

CtrlPktType = \
mk_ring_across_tiles_pkt(width * height,
Expand All @@ -144,10 +155,11 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL]):
num_tile_inports,
num_tile_outports)

NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals,
addr_nbits = addr_nbits,
data_nbits = 32,
predicate_nbits = 1)
NocPktType = mk_multi_cgra_noc_pkt(ncols = num_terminals,
nrows = 1,
addr_nbits = addr_nbits,
data_nbits = 32,
predicate_nbits = 1)
pick_register = [FuInType(x + 1) for x in range(num_fu_inports)]
tile_in_code = [TileInType(max(4 - x, 0)) for x in range(num_routing_outports)]
fu_out_code = [FuOutType(x % 2) for x in range(num_routing_outports)]
Expand Down Expand Up @@ -191,7 +203,7 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL]):
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
src_ctrl_pkt, ctrl_mem_size, topology,
controller2addr_map)
controller2addr_map, idTo2d_map)
return th

def test_homogeneous_2x2(cmdline_opts):
Expand Down
30 changes: 21 additions & 9 deletions cgra/test/CgraTemplateRTL_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,21 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType,
ControllerIdType, controller_id, ctrl_mem_size,
data_mem_size_global, data_mem_size_per_bank,
num_banks_per_cgra, src_ctrl_pkt, ctrl_steps, TileList,
LinkList, dataSPM, controller2addr_map):
LinkList, dataSPM, controller2addr_map, idTo2d_map):

s.num_tiles = len(TileList)
s.src_ctrl_pkt = TestSrcRTL(CtrlPktType, src_ctrl_pkt)

s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType,
NocPktType, CmdType, ControllerIdType, controller_id,
ctrl_mem_size, data_mem_size_global,
NocPktType, CmdType, ControllerIdType,
# CGRA terminals on x/y. Assume in total 4, though this
# test is for single CGRA.
1, 4,
controller_id, ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
ctrl_steps, ctrl_steps, FunctionUnit, FuList,
TileList, LinkList, dataSPM, controller2addr_map)
TileList, LinkList, dataSPM, controller2addr_map,
idTo2d_map)

# Connections
s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt
Expand Down Expand Up @@ -205,6 +209,13 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None):
3: [12, 15],
}

idTo2d_map = {
0: [0, 0],
1: [1, 0],
2: [2, 0],
3: [3, 0],
}

CtrlPktType = \
mk_ring_across_tiles_pkt(width * height,
num_ctrl_actions,
Expand All @@ -221,10 +232,11 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None):
num_tile_inports,
num_tile_outports)

NocPktType = mk_ring_multi_cgra_pkt(nrouters = num_terminals,
addr_nbits = addr_nbits,
data_nbits = 32,
predicate_nbits = 1)
NocPktType = mk_multi_cgra_noc_pkt(ncols = num_terminals,
nrows = 1,
addr_nbits = addr_nbits,
data_nbits = 32,
predicate_nbits = 1)
pick_register = [FuInType(x + 1) for x in range(num_fu_inports)]
tile_in_code = [TileInType(max(4 - x, 0)) for x in range(num_routing_outports)]
fu_out_code = [FuOutType(x % 2) for x in range(num_routing_outports)]
Expand Down Expand Up @@ -376,7 +388,7 @@ def handleReshape( t_tiles ):
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
src_ctrl_pkt, ctrl_mem_size, tiles, links, dataSPM,
controller2addr_map)
controller2addr_map, idTo2d_map)

th.elaborate()
th.dut.set_metadata(VerilogTranslationPass.explicit_module_name,
Expand Down
59 changes: 49 additions & 10 deletions controller/ControllerRTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@
class ControllerRTL(Component):

def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType,
CGRADataType, CGRAAddrType, controller_id,
controller2addr_map):
CGRADataType, CGRAAddrType, multi_cgra_rows,
multi_cgra_columns, controller_id, controller2addr_map,
idTo2d_map):

assert(multi_cgra_columns >= multi_cgra_rows)

# Used for calculating the x/y position.
XType = mk_bits(max(clog2(multi_cgra_columns), 1))
YType = mk_bits(max(clog2(multi_cgra_rows), 1))

# Interface
# Request from/to other CGRA via NoC.
Expand Down Expand Up @@ -57,7 +64,7 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType,
# s.send_to_other_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2)

# Crossbar with 3 inports (load and store requests towards remote
# memory, and load response from master) and 1 outport (only
# memory, and load response from local memory) and 1 outport (only
# allow one request be sent out per cycle).
# TODO: Include other cmd requests, e.g., dynamic rescheduling,
# termination).
Expand Down Expand Up @@ -97,6 +104,14 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType,

s.addr2controller_lut[addr_base] //= ControllerIdType(src_controller_id)

# Constructs the idTo2d lut.
s.idTo2d_x_lut= [Wire(XType) for _ in range(multi_cgra_columns * multi_cgra_rows)]
s.idTo2d_y_lut= [Wire(YType) for _ in range(multi_cgra_columns * multi_cgra_rows)]
for cgra_id in idTo2d_map:
xy = idTo2d_map[cgra_id]
s.idTo2d_x_lut[cgra_id] //= XType(xy[0])
s.idTo2d_y_lut[cgra_id] //= YType(xy[1])

# Connections
# Requests towards others, 1 cycle delay to improve timing.
s.recv_from_tile_load_request_pkt_queue.recv //= s.recv_from_tile_load_request_pkt
Expand Down Expand Up @@ -125,47 +140,66 @@ def update_received_msg():
kLoadResponseInportIdx = 1
kStoreRequestInportIdx = 2

# For the load request from master.
# For the load request from local tiles.
s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_tile_load_request_pkt_queue.send.val
s.recv_from_tile_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy
s.crossbar.recv[kLoadRequestInportIdx].msg @= \
NocPktType(controller_id,
0,
s.idTo2d_x_lut[controller_id], # src_x
s.idTo2d_y_lut[controller_id], # src_y
0, # dst_x
0, # dst_y
0,
0,
CMD_LOAD_REQUEST,
s.recv_from_tile_load_request_pkt_queue.send.msg.addr,
0,
1)
1,
0)


# For the store request from master.

# For the store request from local tiles.
s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_tile_store_request_pkt_queue.send.val
s.recv_from_tile_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy
s.crossbar.recv[kStoreRequestInportIdx].msg @= \
NocPktType(controller_id,
0,
s.idTo2d_x_lut[controller_id], # src_x
s.idTo2d_y_lut[controller_id], # src_y
0, # dst_x
0, # dst_y
0,
0,
CMD_STORE_REQUEST,
s.recv_from_tile_store_request_pkt_queue.send.msg.addr,
s.recv_from_tile_store_request_pkt_queue.send.msg.data,
s.recv_from_tile_store_request_pkt_queue.send.msg.predicate)
s.recv_from_tile_store_request_pkt_queue.send.msg.predicate,
0)


# For the load response (i.e., the data towards other) from master.
# For the load response (i.e., the data towards other) from local memory.
s.crossbar.recv[kLoadResponseInportIdx].val @= \
s.recv_from_tile_load_response_pkt_queue.send.val
s.recv_from_tile_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy
s.crossbar.recv[kLoadResponseInportIdx].msg @= \
NocPktType(controller_id,
0,
s.idTo2d_x_lut[controller_id], # src_x
s.idTo2d_y_lut[controller_id], # src_y
0, # dst_x
0, # dst_y
0,
0,
CMD_LOAD_RESPONSE,
# Retrieves the load (from NoC) address from the message.
# The addr information is embedded in the message.
s.recv_from_tile_load_response_pkt_queue.send.msg.addr,
s.recv_from_tile_load_response_pkt_queue.send.msg.data,
s.recv_from_tile_load_response_pkt_queue.send.msg.predicate)
s.recv_from_tile_load_response_pkt_queue.send.msg.predicate,
0)

# TODO: For the other cmd types.


Expand Down Expand Up @@ -224,12 +258,17 @@ def update_sending_to_noc_msg():
s.send_to_noc.msg @= \
NocPktType(s.crossbar.send[0].msg.src,
addr_dst_id,
s.crossbar.send[0].msg.src_x,
s.crossbar.send[0].msg.src_y,
s.idTo2d_x_lut[addr_dst_id],
s.idTo2d_y_lut[addr_dst_id],
s.crossbar.send[0].msg.opaque,
s.crossbar.send[0].msg.vc_id,
s.crossbar.send[0].msg.cmd,
s.crossbar.send[0].msg.addr,
s.crossbar.send[0].msg.data,
s.crossbar.send[0].msg.predicate)
s.crossbar.send[0].msg.predicate,
s.crossbar.send[0].msg.payload)

def line_trace(s):
send_to_ctrl_ring_ctrl_pkt_str = "send_to_ctrl_ring_ctrl_pkt: " + str(s.send_to_ctrl_ring_ctrl_pkt.msg)
Expand Down
Loading

0 comments on commit cbd277b

Please sign in to comment.