Skip to content

Commit

Permalink
controller: split mg action in table 39 and 40 to fit kernel netlink …
Browse files Browse the repository at this point in the history
…buffer size

Introduce the capability to split multicast group openflow actions
created in consider_mc_group routine in multiple buffers if the
single buffer size is over netlink buffer size limits.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2232152
Signed-off-by: Lorenzo Bianconi <[email protected]>
Acked-by: Mark Michelson <[email protected]>
Acked-by: Ales Musil <[email protected]>
Signed-off-by: Numan Siddique <[email protected]>
  • Loading branch information
LorenzoBianconi authored and numansiddique committed Nov 3, 2023
1 parent cb90eb3 commit 325c7b2
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 78 deletions.
226 changes: 148 additions & 78 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -1969,6 +1969,57 @@ local_output_pb(int64_t tunnel_key, struct ofpbuf *ofpacts)
put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts);
}

#define MC_OFPACTS_MAX_MSG_SIZE 8192
#define MC_BUF_START_ID 0x9000

static void
mc_ofctrl_add_flow(const struct sbrec_multicast_group *mc,
struct match *match, struct ofpbuf *ofpacts,
struct ofpbuf *ofpacts_last, uint8_t stage,
size_t index, uint32_t *pflow_index,
uint16_t prio, struct ovn_desired_flow_table *flow_table)

{
/* do not overcome max netlink message size used by ovs-vswitchd to
* send netlink configuration to the kernel. */
if (ofpacts->size < MC_OFPACTS_MAX_MSG_SIZE && index < (mc->n_ports - 1)) {
return;
}

uint32_t flow_index = *pflow_index;
bool is_first = (flow_index == MC_BUF_START_ID);
if (!is_first) {
match_set_reg(match, MFF_REG6 - MFF_REG0, flow_index);
prio += 10;
}

if (index == (mc->n_ports - 1)) {
ofpbuf_put(ofpacts, ofpacts_last->data, ofpacts_last->size);
} else {
/* Split multicast groups with size greater than
* MC_OFPACTS_MAX_MSG_SIZE in order to not overcome the
* MAX_ACTIONS_BUFSIZE netlink buffer size supported by the kernel.
* In order to avoid all the action buffers to be squashed together by
* ovs, add a controller action for each configured openflow.
*/
size_t oc_offset = encode_start_controller_op(
ACTION_OPCODE_MG_SPLIT_BUF, false, NX_CTLR_NO_METER, ofpacts);
ovs_be32 val = htonl(++flow_index);
ofpbuf_put(ofpacts, &val, sizeof val);
val = htonl(mc->tunnel_key);
ofpbuf_put(ofpacts, &val, sizeof val);
ofpbuf_put(ofpacts, &stage, sizeof stage);
encode_finish_controller_op(oc_offset, ofpacts);
}

ofctrl_add_flow(flow_table, stage, prio, mc->header_.uuid.parts[0],
match, ofpacts, &mc->header_.uuid);
ofpbuf_clear(ofpacts);
/* reset MFF_REG6. */
put_load(0, MFF_REG6, 0, 32, ofpacts);
*pflow_index = flow_index;
}

static void
consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
enum mf_field_id mff_ovn_geneve,
Expand All @@ -1990,9 +2041,6 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);

struct match match;
match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);

/* Go through all of the ports in the multicast group:
*
* - For remote ports, add the chassis to 'remote_chassis' or
Expand All @@ -2014,9 +2062,20 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
* the redirect port was added.
*/
struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
struct ofpbuf ofpacts_last, ofpacts_ramp_last;
ofpbuf_init(&ofpacts, 0);
ofpbuf_init(&remote_ofpacts, 0);
ofpbuf_init(&remote_ofpacts_ramp, 0);
ofpbuf_init(&ofpacts_last, 0);
ofpbuf_init(&ofpacts_ramp_last, 0);

bool local_ports = false, remote_ports = false, remote_ramp_ports = false;

/* local port loop. */
uint32_t flow_index = MC_BUF_START_ID;
put_load(0, MFF_REG6, 0, 32, &ofpacts);
put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts_last);

for (size_t i = 0; i < mc->n_ports; i++) {
struct sbrec_port_binding *port = mc->ports[i];

Expand All @@ -2040,19 +2099,15 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
if (ldp->is_transit_switch) {
local_output_pb(port->tunnel_key, &ofpacts);
} else {
local_output_pb(port->tunnel_key, &remote_ofpacts);
local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
remote_ramp_ports = true;
remote_ports = true;
}
} if (!strcmp(port->type, "remote")) {
if (port->chassis) {
put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts);
tunnel_to_chassis(mff_ovn_geneve, port->chassis->name,
chassis_tunnels, mc->datapath,
port->tunnel_key, &remote_ofpacts);
remote_ports = true;
}
} else if (!strcmp(port->type, "localport")) {
local_output_pb(port->tunnel_key, &remote_ofpacts);
remote_ports = true;
} else if ((port->chassis == chassis
|| is_additional_chassis(port, chassis))
&& (local_binding_get_primary_pb(local_bindings, lport_name)
Expand Down Expand Up @@ -2095,86 +2150,101 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
}
}
}

local_ports |= (ofpacts.size > 0);
if (!local_ports) {
continue;
}

struct match match;
match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
mc_ofctrl_add_flow(mc, &match, &ofpacts, &ofpacts_last,
OFTABLE_LOCAL_OUTPUT, i, &flow_index, 100,
flow_table);
}

/* Table 40, priority 100.
* =======================
*
* Handle output to the local logical ports in the multicast group, if
* any. */
bool local_ports = ofpacts.size > 0;
if (local_ports) {
/* Following delivery to local logical ports, restore the multicast
* group as the logical output port. */
put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
/* remote port loop. */
ofpbuf_clear(&ofpacts_last);
if (remote_ports) {
put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts_last);
}

ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
mc->header_.uuid.parts[0],
&match, &ofpacts, &mc->header_.uuid);
fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
mc->datapath, mc->tunnel_key, false, &ofpacts_last);
fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
mc->datapath, mc->tunnel_key, true, &ofpacts_last);

remote_ports |= (ofpacts_last.size > 0);
if (remote_ports && local_ports) {
put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts_last);
}

/* Table 39, priority 100.
* =======================
*
* Handle output to the remote chassis in the multicast group, if
* any. */
if (!sset_is_empty(&remote_chassis) ||
!sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) {
if (remote_ofpacts.size > 0) {
/* Following delivery to logical patch ports, restore the
* multicast group as the logical output port. */
put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts);

if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) {
struct match match_ramp;
match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
MLF_RCV_FROM_RAMP);

put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts_ramp);

/* MCAST traffic which was originally received from RAMP_SWITCH
* is not allowed to be re-sent to remote_chassis.
* Process "patch" port binding for routing in
* OFTABLE_REMOTE_OUTPUT and resubmit packets to
* OFTABLE_LOCAL_OUTPUT for local delivery. */

match_outport_dp_and_port_keys(&match_ramp, dp_key,
mc->tunnel_key);

/* Match packets coming from RAMP_SWITCH and allowed for
* loopback processing (including routing). */
match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);

put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);

ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120,
mc->header_.uuid.parts[0], &match_ramp,
&remote_ofpacts_ramp, &mc->header_.uuid);
bool has_vtep = get_vtep_port(local_datapaths, mc->datapath->tunnel_key);
uint32_t reverse_ramp_flow_index = MC_BUF_START_ID;
flow_index = MC_BUF_START_ID;

put_load(0, MFF_REG6, 0, 32, &remote_ofpacts);
put_load(0, MFF_REG6, 0, 32, &remote_ofpacts_ramp);

put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts_ramp_last);
put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts_ramp_last);

for (size_t i = 0; remote_ports && i < mc->n_ports; i++) {
struct sbrec_port_binding *port = mc->ports[i];

if (port->datapath != mc->datapath) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports "
"in wrong datapath",
UUID_ARGS(&mc->header_.uuid));
continue;
}

if (!strcmp(port->type, "patch")) {
if (!ldp->is_transit_switch) {
local_output_pb(port->tunnel_key, &remote_ofpacts);
local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
}
} if (!strcmp(port->type, "remote")) {
if (port->chassis) {
put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts);
tunnel_to_chassis(mff_ovn_geneve, port->chassis->name,
chassis_tunnels, mc->datapath,
port->tunnel_key, &remote_ofpacts);
}
} else if (!strcmp(port->type, "localport")) {
local_output_pb(port->tunnel_key, &remote_ofpacts);
}

fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
mc->datapath, mc->tunnel_key, false,
&remote_ofpacts);
fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
mc->datapath, mc->tunnel_key, true,
&remote_ofpacts);
struct match match;
match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
if (has_vtep) {
match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
MLF_RCV_FROM_RAMP);
}
mc_ofctrl_add_flow(mc, &match, &remote_ofpacts, &ofpacts_last,
OFTABLE_REMOTE_OUTPUT, i, &flow_index, 100,
flow_table);

if (remote_ofpacts.size) {
if (local_ports) {
put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
}
ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
mc->header_.uuid.parts[0],
&match, &remote_ofpacts, &mc->header_.uuid);
if (!remote_ramp_ports || !has_vtep) {
continue;
}

struct match match_ramp;
match_outport_dp_and_port_keys(&match_ramp, dp_key, mc->tunnel_key);
match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
mc_ofctrl_add_flow(mc, &match_ramp, &remote_ofpacts_ramp,
&ofpacts_ramp_last, OFTABLE_REMOTE_OUTPUT, i,
&reverse_ramp_flow_index, 120, flow_table);
}

ofpbuf_uninit(&ofpacts);
ofpbuf_uninit(&remote_ofpacts);
ofpbuf_uninit(&ofpacts_last);
ofpbuf_uninit(&ofpacts_ramp_last);
ofpbuf_uninit(&remote_ofpacts_ramp);
sset_destroy(&remote_chassis);
sset_destroy(&vtep_chassis);
Expand Down
66 changes: 66 additions & 0 deletions controller/pinctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ static void send_mac_binding_buffered_pkts(struct rconn *swconn)

static void pinctrl_rarp_activation_strategy_handler(const struct match *md);

static void pinctrl_mg_split_buff_handler(
struct rconn *swconn, struct dp_packet *pkt,
const struct match *md, struct ofpbuf *userdata);

static void init_activated_ports(void);
static void destroy_activated_ports(void);
static void wait_activated_ports(void);
Expand Down Expand Up @@ -3283,6 +3287,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
ovs_mutex_unlock(&pinctrl_mutex);
break;

case ACTION_OPCODE_MG_SPLIT_BUF:
pinctrl_mg_split_buff_handler(swconn, &packet, &pin.flow_metadata,
&userdata);
break;

default:
VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
ntohl(ah->opcode));
Expand Down Expand Up @@ -8154,6 +8163,63 @@ pinctrl_rarp_activation_strategy_handler(const struct match *md)
notify_pinctrl_main();
}

static void
pinctrl_mg_split_buff_handler(struct rconn *swconn, struct dp_packet *pkt,
const struct match *md, struct ofpbuf *userdata)
{
ovs_be32 *index = ofpbuf_try_pull(userdata, sizeof *index);
if (!index) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "%s: missing index field", __func__);
return;
}

ovs_be32 *mg = ofpbuf_try_pull(userdata, sizeof *mg);
if (!mg) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "%s: missing multicast group field", __func__);
return;
}

uint8_t *table_id = ofpbuf_try_pull(userdata, sizeof *table_id);
if (!table_id) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "%s: missing table_id field", __func__);
return;
}

struct ofpbuf ofpacts;
ofpbuf_init(&ofpacts, 4096);
reload_metadata(&ofpacts, md);

/* reload pkt_mark field */
const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
union mf_value pkt_mark_value;
mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL);

put_load(ntohl(*index), MFF_REG6, 0, 32, &ofpacts);
put_load(ntohl(*mg), MFF_LOG_OUTPORT, 0, 32, &ofpacts);

struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
resubmit->in_port = OFPP_CONTROLLER;
resubmit->table_id = *table_id;

struct ofputil_packet_out po = {
.packet = dp_packet_data(pkt),
.packet_len = dp_packet_size(pkt),
.buffer_id = UINT32_MAX,
.ofpacts = ofpacts.data,
.ofpacts_len = ofpacts.size,
};
match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
enum ofp_version version = rconn_get_version(swconn);
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
queue_msg(swconn, ofputil_encode_packet_out(&po, proto));

ofpbuf_uninit(&ofpacts);
}

static struct hmap put_fdbs;

/* MAC learning (fdb) related functions. Runs within the main
Expand Down
3 changes: 3 additions & 0 deletions include/ovn/actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@ enum action_opcode {

/* activation_strategy_rarp() */
ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,

/* multicast group split buffer action. */
ACTION_OPCODE_MG_SPLIT_BUF,
};

/* Header. */
Expand Down
Loading

0 comments on commit 325c7b2

Please sign in to comment.