Skip to content

Commit

Permalink
Add DASH HA session API design. (#532)
Browse files Browse the repository at this point in the history
This change is made to add HA session API HLD and P4 changes for SAI API generation.

To describe how the HA session API works, the HLD in this change contains:

- The key fundamental components that used to form the HA set topology and their SAI API design.
- How we could implement HA in DASH behavior model and how each components works with each other.
- The life of the packet for inline flow sync.
- The SAI API call sequence of several typical HA related work flows.

The change also contains the P4 code that generates the SAI APIs.
  • Loading branch information
r12f authored Apr 4, 2024
1 parent f2200ff commit 31b94c8
Show file tree
Hide file tree
Showing 30 changed files with 913 additions and 10 deletions.
6 changes: 6 additions & 0 deletions .wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Accton
ACK
Ack
ack
ack'ed
acl
ACL
ACLs
Expand Down Expand Up @@ -304,6 +305,7 @@ IxLoad
ixload
IxNetwork
IxNetworkWeb
Jiang
Jinja
jitter
journaled
Expand Down Expand Up @@ -372,6 +374,7 @@ NonSynStateful
NorthBound
Novus
NPL
NPU
NPUS
NSG
NSGs
Expand Down Expand Up @@ -471,11 +474,13 @@ README
READMEs
README's
reconvergence
RECV
RedirectRuleResimulatedUf
redis
renderer
repo
repos
REQ
resimulated
resimulation
responder
Expand Down Expand Up @@ -534,6 +539,7 @@ SmartAppliances
SmartNIC
SmartNic
SmartNICs
SmartSwitch
SmartSwitches
snappi
SNAT
Expand Down
2 changes: 1 addition & 1 deletion dash-pipeline/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ sai-submodule:
# P4 Source code compile TARGETS
######################################

P4_SRC=$(wildcard bmv2/*.p4)
P4_SRC=$(wildcard bmv2/**/*.p4)
P4_MAIN=bmv2/dash_pipeline.p4
P4_OUTDIR=bmv2/dash_pipeline.bmv2
P4_ARTIFACTS=$(P4_OUTDIR)/dash_pipeline.json $(P4_OUTDIR)/dash_pipeline_p4rt.txt
Expand Down
8 changes: 8 additions & 0 deletions dash-pipeline/bmv2/dash_arch_specific.p4
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,12 @@

#endif // TARGET_DPDK_PNA

//
// Utility macros
//

// The second macro will have the value of x expanded before stringification.
#define PP_STR_RAW(x) #x
#define PP_STR(x) PP_STR_RAW(x)

#endif // __DASH_TARGET_SPECIFIC__
72 changes: 70 additions & 2 deletions dash-pipeline/bmv2/dash_metadata.p4
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#include "dash_headers.p4"

#define MAX_ENI 64
#define MAX_HA_SET 1

enum bit<32> dash_routing_actions_t {
NONE = 0,
STATIC_ENCAP = (1 << 0),
Expand All @@ -14,7 +17,21 @@ enum bit<16> dash_direction_t {
INVALID = 0,
OUTBOUND = 1,
INBOUND = 2
}
};

enum bit<8> dash_packet_source_t {
EXTERNAL = 0, // Packets from external sources.
DPAPP = 1, // Packets from data plane app.
PEER = 2 // Packets from the paired DPU.
};

enum bit<8> dash_packet_type_t {
REGULAR = 0, // Regular packets from external sources.
FLOW_SYNC_REQ = 1, // Flow sync request packet.
FLOW_SYNC_ACK = 2, // Flow sync ack packet.
DP_PROBE_REQ = 3, // Data plane probe packet.
DP_PROBE_ACK = 4 // Data plane probe ack packet.
};

// Pipeline stages:
enum bit<16> dash_pipeline_stage_t {
Expand All @@ -30,7 +47,7 @@ enum bit<16> dash_pipeline_stage_t {

// Common stages
ROUTING_ACTION_APPLY = 300
}
};

struct conntrack_data_t {
bool allow_in;
Expand Down Expand Up @@ -73,7 +90,55 @@ struct overlay_rewrite_data_t {
IPv6Address dip_mask;
}

// HA roles
enum bit<8> dash_ha_role_t {
DEAD = 0,
ACTIVE = 1,
STANDBY = 2,
STANDALONE = 3,
SWITCHING_TO_ACTIVE = 4
};

// Flow sync state
enum bit<8> dash_ha_flow_sync_state_t {
FLOW_MISS = 0, // Flow not created yet
FLOW_CREATED = 1, // Flow is created but not synched or waiting for ack
FLOW_SYNCED = 2, // Flow has been synched to its peer
FLOW_PENDING_DELETE = 3, // Flow is pending deletion, waiting for ack
FLOW_PENDING_RESIMULATION = 4 // Flow is marked as pending resimulation
};

// HA flow sync operations
enum bit<8> dash_ha_flow_sync_op_t {
FLOW_CREATE = 0, // New flow creation.
FLOW_UPDATE = 1, // Flow resimulation or any other reason causing existing flow to be updated.
FLOW_DELETE = 2 // Flow deletion.
};

struct ha_data_t {
// HA scope settings
bit<16> ha_scope_id;
bit<16> ha_set_id;
dash_ha_role_t ha_role;

// HA set settings
bit<1> local_ip_is_v6;
IPv4ORv6Address local_ip;
bit<1> peer_ip_is_v6;
IPv4ORv6Address peer_ip;
bit<16> dp_channel_dst_port;
bit<16> dp_channel_src_port_min;
bit<16> dp_channel_src_port_max;

// HA packet/flow state
dash_ha_flow_sync_state_t flow_sync_state;
}

struct metadata_t {
// Packet type
dash_packet_source_t packet_source; // TODO: Parse packet source in parser.
dash_packet_type_t packet_type; // TODO: Parse packet type in parser.

// Lookup context
dash_direction_t direction;
EthernetAddress eni_addr;
Expand Down Expand Up @@ -109,6 +174,9 @@ struct metadata_t {
bool is_fast_path_icmp_flow_redirection_packet;
bit<1> fast_path_icmp_flow_redirection_disabled;

// HA
ha_data_t ha;

// Stage transition control
dash_pipeline_stage_t target_stage;

Expand Down
31 changes: 26 additions & 5 deletions dash-pipeline/bmv2/dash_pipeline.p4
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
#include "dash_conntrack.p4"
#include "stages/direction_lookup.p4"
#include "stages/eni_lookup.p4"
#include "stages/ha.p4"
#include "stages/routing_action_apply.p4"
#include "stages/metering_update.p4"
#include "underlay.p4"

#define MAX_ENI 64

control dash_ingress(
inout headers_t hdr
, inout metadata_t meta
Expand Down Expand Up @@ -92,12 +91,19 @@ control dash_ingress(
meta.stage4_dash_acl_group_id = ## prefix ##_stage4_dash_acl_group_id; \
meta.stage5_dash_acl_group_id = ## prefix ##_stage5_dash_acl_group_id;

DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs")
DEFINE_COUNTER(eni_rx_counter, MAX_ENI, name="rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_tx_counter, MAX_ENI, name="tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_outbound_rx_counter, MAX_ENI, name="outbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_outbound_tx_counter, MAX_ENI, name="outbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_inbound_rx_counter, MAX_ENI, name="inbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_inbound_tx_counter, MAX_ENI, name="inbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs", order=0)

action set_eni_attrs(bit<32> cps,
bit<32> pps,
bit<32> flows,
bit<1> admin_state,
@SaiVal[type="sai_object_id_t"] bit<16> ha_scope_id,
@SaiVal[type="sai_ip_address_t"] IPv4Address vm_underlay_dip,
@SaiVal[type="sai_uint32_t"] bit<24> vm_vni,
@SaiVal[type="sai_object_id_t"] bit<16> vnet_id,
Expand Down Expand Up @@ -144,7 +150,8 @@ control dash_ingress(
}
meta.meter_policy_id = v4_meter_policy_id;
}


meta.ha.ha_scope_id = ha_scope_id;
meta.fast_path_icmp_flow_redirection_disabled = disable_fast_path_icmp_flow_redirection;
}

Expand Down Expand Up @@ -302,17 +309,23 @@ control dash_ingress(
if (meta.eni_data.admin_state == 0) {
deny();
}


UPDATE_COUNTER(eni_rx_counter, meta.eni_id);
if (meta.is_fast_path_icmp_flow_redirection_packet) {
UPDATE_COUNTER(eni_lb_fast_path_icmp_in_counter, meta.eni_id);
}

ha_stage.apply(hdr, meta);

acl_group.apply();

if (meta.direction == dash_direction_t.OUTBOUND) {
UPDATE_COUNTER(eni_outbound_rx_counter, meta.eni_id);

meta.target_stage = dash_pipeline_stage_t.OUTBOUND_ROUTING;
outbound.apply(hdr, meta);
} else if (meta.direction == dash_direction_t.INBOUND) {
UPDATE_COUNTER(eni_inbound_rx_counter, meta.eni_id);
inbound.apply(hdr, meta);
}

Expand Down Expand Up @@ -340,6 +353,14 @@ control dash_ingress(

if (meta.dropped) {
drop_action();
} else {
UPDATE_COUNTER(eni_tx_counter, meta.eni_id);

if (meta.direction == dash_direction_t.OUTBOUND) {
UPDATE_COUNTER(eni_outbound_tx_counter, meta.eni_id);
} else if (meta.direction == dash_direction_t.INBOUND) {
UPDATE_COUNTER(eni_inbound_tx_counter, meta.eni_id);
}
}
}
}
Expand Down
133 changes: 133 additions & 0 deletions dash-pipeline/bmv2/stages/ha.p4
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#ifndef _DASH_STAGE_HA_P4_
#define _DASH_STAGE_HA_P4_

control ha_stage(inout headers_t hdr,
inout metadata_t meta)
{
//
// ENI-level flow operation counters:
//
DEFINE_HIT_COUNTER(flow_created_counter, MAX_ENI, name="flow_created", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_create_failed_counter, MAX_ENI, name="flow_create_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_updated_counter, MAX_ENI, name="flow_updated", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_update_failed_counter, MAX_ENI, name="flow_update_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_deleted_counter, MAX_ENI, name="flow_deleted", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_delete_failed_counter, MAX_ENI, name="flow_delete_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_aged_counter, MAX_ENI, name="flow_aged", attr_type="stats", action_names="set_eni_attrs", order=1)

//
// ENI-level flow sync packet counters:
//
DEFINE_COUNTER(inline_sync_packet_rx_counter, MAX_ENI, name="inline_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(inline_sync_packet_tx_counter, MAX_ENI, name="inline_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(timed_sync_packet_rx_counter, MAX_ENI, name="timed_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(timed_sync_packet_tx_counter, MAX_ENI, name="timed_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2)

//
// ENI-level flow sync request counters:
// - Depends on implementations, the flow sync request could be batched, hence they need to tracked separately.
// - The counters are defined as combination of following things:
// - 3 flow sync operations: create, update, delete.
// - 2 ways of sync: Inline sync and timed sync.
// - Request result: succeeded, failed (unexpected) and ignored (expected and ok to ignore, e.g., more packets arrives before flow sync is acked).
//
#define DEFINE_ENI_FLOW_SYNC_COUNTERS(counter_name) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
\
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2)

DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_create)
DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_update)
DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_delete)

//
// HA scope:
//
action set_ha_scope_attr(
@SalVal[type="sai_object_id_t"] bit<16> ha_set_id,
@SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t dash_ha_role,
@SaiVal[isreadonly="true"] bit<32> flow_version
) {
meta.ha.ha_set_id = ha_set_id;
meta.ha.ha_role = dash_ha_role;
}

@SaiTable[api = "dash_ha", order=1, isobject="true"]
table ha_scope {
key = {
meta.ha.ha_scope_id : exact;
}
actions = {
set_ha_scope_attr;
}
}

//
// HA set:
//
DEFINE_COUNTER(dp_probe_req_rx, MAX_HA_SET, name="dp_probe_req_rx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_req_tx, MAX_HA_SET, name="dp_probe_req_tx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_ack_rx, MAX_HA_SET, name="dp_probe_ack_rx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_ack_tx, MAX_HA_SET, name="dp_probe_ack_tx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_HIT_COUNTER(dp_probe_failed, MAX_HA_SET, name="dp_probe_failed", attr_type="stats", action_names="set_ha_set_attr")

action set_ha_set_attr(
bit<1> local_ip_is_v6,
@SaiVal[type="sai_ip_address_t"] IPv4ORv6Address local_ip,
bit<1> peer_ip_is_v6,
@SaiVal[type="sai_ip_address_t"] IPv4ORv6Address peer_ip,
bit<16> dp_channel_dst_port,
bit<16> dp_channel_min_src_port,
bit<16> dp_channel_max_src_port,
bit<32> dp_channel_probe_interval_ms,
bit<32> dp_channel_probe_fail_threshold,
@SaiVal[isreadonly="true"] bit<1> dp_channel_is_alive
) {
meta.ha.peer_ip_is_v6 = peer_ip_is_v6;
meta.ha.peer_ip = peer_ip;

meta.ha.dp_channel_dst_port = dp_channel_dst_port;
meta.ha.dp_channel_src_port_min = dp_channel_min_src_port;
meta.ha.dp_channel_src_port_max = dp_channel_max_src_port;
}

@SaiTable[api = "dash_ha", order=0, isobject="true"]
table ha_set {
key = {
meta.ha.ha_set_id : exact @SaiVal[type="sai_object_id_t"];
}
actions = {
set_ha_set_attr;
}
}

apply {
// If HA scope id is not set, then HA is not enabled.
if (meta.ha.ha_scope_id == 0) {
return;
}
ha_scope.apply();

// If HA set id is not set, then HA is not enabled.
if (meta.ha.ha_set_id == 0) {
return;
}
ha_set.apply();

// TODO: HA state machine handling.
}
}

#endif /* _DASH_STAGE_HA_P4_ */
Loading

0 comments on commit 31b94c8

Please sign in to comment.