diff --git a/NEWS b/NEWS index 72542a2463..d39e093933 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ Post v24.09.0 ECMP-nexthop. By default ovn-controller continuously sends ARP/ND packets for ECMP-nexthop. + - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct + entries when related ecmp routes are removed by the CMS. OVN v24.09.0 - 13 Sep 2024 -------------------------- diff --git a/controller/automake.mk b/controller/automake.mk index bb0bf2d336..766e363829 100644 --- a/controller/automake.mk +++ b/controller/automake.mk @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \ controller/ct-zone.h \ controller/ct-zone.c \ controller/ovn-dns.c \ - controller/ovn-dns.h + controller/ovn-dns.h \ + controller/ecmp-next-hop-monitor.h \ + controller/ecmp-next-hop-monitor.c controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la man_MANS += controller/ovn-controller.8 diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c new file mode 100644 index 0000000000..bafe9750f8 --- /dev/null +++ b/controller/ecmp-next-hop-monitor.c @@ -0,0 +1,184 @@ +/* Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "ct-zone.h" +#include "lib/ovn-util.h" +#include "lib/simap.h" +#include "openvswitch/hmap.h" +#include "openvswitch/ofp-ct.h" +#include "openvswitch/rconn.h" +#include "openvswitch/vlog.h" +#include "ovn/logical-fields.h" +#include "ovn-sb-idl.h" +#include "controller/ecmp-next-hop-monitor.h" + +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor); + +static struct hmap ecmp_nexthop; + +struct ecmp_nexthop_data { + struct hmap_node hmap_node; + uint16_t zone_id; + char *nexthop; + char *mac; +}; + +void ecmp_nexthop_init(void) +{ + hmap_init(&ecmp_nexthop); +} + +static void +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e) +{ + free(e->nexthop); + free(e->mac); + free(e); +} + +static void +ecmp_nexthop_destroy_map(struct hmap *map) +{ + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_POP (e, hmap_node, map) { + ecmp_nexthop_erase_entry(e); + } + hmap_destroy(map); +} + +void ecmp_nexthop_destroy(void) +{ + ecmp_nexthop_destroy_map(&ecmp_nexthop); +} + +static struct ecmp_nexthop_data * +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac, + const uint16_t zone_id, struct hmap *map) +{ + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); + e->nexthop = xstrdup(nexthop); + e->mac = xstrdup(mac); + e->zone_id = zone_id; + + uint32_t hash = hash_string(nexthop, 0); + hash = hash_add(hash, hash_string(mac, 0)); + hash = hash_add(hash, zone_id); + hmap_insert(map, &e->hmap_node, hash); + + return e; +} + +static struct ecmp_nexthop_data * +ecmp_nexthop_find_entry(const char *nexthop, const char *mac, + const uint16_t zone_id, struct hmap *map) +{ + uint32_t hash = hash_string(nexthop, 0); + hash = hash_add(hash, hash_string(mac, 0)); + hash = hash_add(hash, zone_id); + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { + if (!strcmp(e->nexthop, nexthop) && + !strcmp(e->mac, mac) && e->zone_id == zone_id) { + return e; + } + } + return NULL; +} + +static void +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn, + const char *mac, uint16_t zone_id, + struct ovs_list *msgs) +{ + struct eth_addr ea; + if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { + return; + } + + ovs_u128 mask = { + /* ct_label.ecmp_reply_eth BITS[32-79] */ + .u64.hi = OVN_CT_ECMP_ETH_HIGH, + .u64.lo = OVN_CT_ECMP_ETH_LOW, + }; + + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]); + ovs_u128 nexthop = { + .u64.hi = ntohs(ea.be16[0]), + .u64.lo = (uint64_t) ntohl(lo) << 32, + }; + + struct ofp_ct_match match = { + .labels = nexthop, + .labels_mask = mask, + }; + struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id, + rconn_get_version(swconn)); + ovs_list_push_back(msgs, &msg->list_node); +} + +void +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, + const struct shash *current_ct_zones, + const struct rconn *swconn, struct ovs_list *msgs) +{ + struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop); + + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop; + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) { + struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port; + if (!pb) { + continue; + } + + const char *dp_name = smap_get(&pb->datapath->external_ids, "name"); + if (!dp_name) { + continue; + } + + char *name = xasprintf("%s_dnat", dp_name); + struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name); + free(name); + + if (!ct_zone) { + continue; + } + + if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, ct_zone->zone, + &ecmp_nexthop)) { + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, + ct_zone->zone, &ecmp_nexthop); + } + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, ct_zone->zone, + &sb_ecmp_nexthop); + } + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) { + if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id, + &sb_ecmp_nexthop)) { + ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac, + e->zone_id, msgs); + hmap_remove(&ecmp_nexthop, &e->hmap_node); + ecmp_nexthop_erase_entry(e); + } + } + + ecmp_nexthop_destroy_map(&sb_ecmp_nexthop); +} diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h new file mode 100644 index 0000000000..ee8278e3bb --- /dev/null +++ b/controller/ecmp-next-hop-monitor.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H +#define OVN_CMP_NEXT_HOP_MONITOR_H + +void ecmp_nexthop_init(void); +void ecmp_nexthop_destroy(void); +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, + const struct shash *current_ct_zones, + const struct rconn *swconn, + struct ovs_list *msgs); +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */ diff --git a/controller/ofctrl.c b/controller/ofctrl.c index b681bc3681..4c7c1b0c9e 100644 --- a/controller/ofctrl.c +++ b/controller/ofctrl.c @@ -54,6 +54,7 @@ #include "vswitch-idl.h" #include "ovn-sb-idl.h" #include "ct-zone.h" +#include "ecmp-next-hop-monitor.h" VLOG_DEFINE_THIS_MODULE(ofctrl); @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table, tx_counter = rconn_packet_counter_create(); hmap_init(&installed_lflows); hmap_init(&installed_pflows); + ecmp_nexthop_init(); ovs_list_init(&flow_updates); ovn_init_symtab(&symtab); groups = group_table; @@ -877,6 +879,7 @@ ofctrl_destroy(void) expr_symtab_destroy(&symtab); shash_destroy(&symtab); ofctrl_meter_bands_destroy(); + ecmp_nexthop_destroy(); } uint64_t @@ -2662,8 +2665,10 @@ void ofctrl_put(struct ovn_desired_flow_table *lflow_table, struct ovn_desired_flow_table *pflow_table, struct shash *pending_ct_zones, + struct shash *current_ct_zones, struct hmap *pending_lb_tuples, struct ovsdb_idl_index *sbrec_meter_by_name, + const struct sbrec_ecmp_nexthop_table *enh_table, uint64_t req_cfg, bool lflows_changed, bool pflows_changed) @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table, /* OpenFlow messages to send to the switch to bring it up-to-date. */ struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs); + ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs); + /* Iterate through ct zones that need to be flushed. */ struct shash_node *iter; SHASH_FOR_EACH(iter, pending_ct_zones) { diff --git a/controller/ofctrl.h b/controller/ofctrl.h index 129e3b6ad5..5735cd553b 100644 --- a/controller/ofctrl.h +++ b/controller/ofctrl.h @@ -31,6 +31,7 @@ struct ofpbuf; struct ovsrec_bridge; struct ovsrec_open_vswitch_table; struct sbrec_meter_table; +struct sbrec_ecmp_nexthop_table; struct shash; struct ovn_desired_flow_table { @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void); void ofctrl_put(struct ovn_desired_flow_table *lflow_table, struct ovn_desired_flow_table *pflow_table, struct shash *pending_ct_zones, + struct shash *current_ct_zones, struct hmap *pending_lb_tuples, struct ovsdb_idl_index *sbrec_meter_by_name, + const struct sbrec_ecmp_nexthop_table *enh_table, uint64_t nb_cfg, bool lflow_changed, bool pflow_changed); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index ec8dd24c70..1dd69f7cf8 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -5827,8 +5827,11 @@ main(int argc, char *argv[]) ofctrl_put(&lflow_output_data->flow_table, &pflow_output_data->flow_table, &ct_zones_data->ctx.pending, + &ct_zones_data->ctx.current, &lb_data->removed_tuples, sbrec_meter_by_name, + sbrec_ecmp_nexthop_table_get( + ovnsb_idl_loop.idl), ofctrl_seqno_get_req_cfg(), engine_node_changed(&en_lflow_output), engine_node_changed(&en_pflow_output)); diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 70c6b93c41..dc964f1c77 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -213,6 +213,9 @@ const struct ovn_field *ovn_field_from_name(const char *name); #define OVN_CT_ECMP_ETH_1ST_BIT 32 #define OVN_CT_ECMP_ETH_END_BIT 79 +#define OVN_CT_ECMP_ETH_LOW (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32) +#define OVN_CT_ECMP_ETH_HIGH ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1) + #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE) #define OVN_CT_MASKED_STR(LABEL_VALUE) \ OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 19ec1eb8df..feae37e8a1 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -14721,3 +14721,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP Flush CT entries - IPv4]) +AT_KEYWORDS([ecmp]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) +ADD_BR([br-ext]) +ADD_BR([br-ecmp]) + +ovs-ofctl add-flow br-ext action=normal +ovs-ofctl add-flow br-ecmp action=normal +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:chassis=hv1 +check ovn-nbctl lr-add R2 +check ovn-nbctl set logical_router R2 options:chassis=hv1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl ls-add sw1 +check ovn-nbctl ls-add public + +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24 + +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24 + +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ + type=router options:router-port=rp-sw0 \ + -- lsp-set-addresses sw0-rp router + +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ + type=router options:router-port=rp-sw1 \ + -- lsp-set-addresses sw1-rp router + +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ + type=router options:router-port=rp-public1 \ + -- lsp-set-addresses public-rp1 router + +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ + type=router options:router-port=rp-public2 \ + -- lsp-set-addresses public-rp2 router + +ADD_NAMESPACES(alice) +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ + "192.168.1.1") +check ovn-nbctl lsp-add sw0 alice \ + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2" + +ADD_NAMESPACES(peter) +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \ + "192.168.2.1") +check ovn-nbctl lsp-add sw1 peter \ + -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2" + +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext +check ovn-nbctl lsp-add public public1 \ + -- lsp-set-addresses public1 unknown \ + -- lsp-set-type public1 localnet \ + -- lsp-set-options public1 network_name=phynet + +ADD_NAMESPACES(ecmp-path0) +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1") +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04") + +ADD_NAMESPACES(ecmp-path1) +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1") +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05") + +ADD_NAMESPACES(bob) +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2") + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 + +wait_for_ports_up +check ovn-nbctl --wait=hv sync +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid]) +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020400000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020400000000,protoinfo=(state=) +]) + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2]) +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020400000000 +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020400000000,protoinfo=(state=) +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2' + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3]) +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3 + +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 4 + +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5]) +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020600000000 +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020600000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +]) + +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=,type=0,code=0),zone=,mark=,labels=0xf0000001020600000000 +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.2.10,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +]) + +check ovn-nbctl lr-route-del R2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP Flush CT entries - IPv6]) +AT_KEYWORDS([ecmp]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) +ADD_BR([br-ext]) +ADD_BR([br-ecmp]) + +ovs-ofctl add-flow br-ext action=normal +ovs-ofctl add-flow br-ecmp action=normal +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:chassis=hv1 +check ovn-nbctl lr-add R2 +check ovn-nbctl set logical_router R2 options:chassis=hv1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl ls-add sw1 +check ovn-nbctl ls-add public + +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64 +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64 + +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64 +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64 + +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ + type=router options:router-port=rp-sw0 \ + -- lsp-set-addresses sw0-rp router + +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ + type=router options:router-port=rp-sw1 \ + -- lsp-set-addresses sw1-rp router + +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ + type=router options:router-port=rp-public1 \ + -- lsp-set-addresses public-rp1 router + +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ + type=router options:router-port=rp-public2 \ + -- lsp-set-addresses public-rp2 router + +ADD_NAMESPACES(alice) +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad") +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2" + +ADD_NAMESPACES(peter) +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad") +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2" + +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext +check ovn-nbctl lsp-add public public1 \ + -- lsp-set-addresses public1 unknown \ + -- lsp-set-type public1 localnet \ + -- lsp-set-options public1 network_name=phynet + +ADD_NAMESPACES(ecmp-path0) +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad") +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04") +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])]) + +ADD_NAMESPACES(ecmp-path1) +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad") +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05") +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])]) + +ADD_NAMESPACES(bob) +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad") + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 + +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl +net.ipv6.conf.all.forwarding = 1 +]) +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl +net.ipv6.conf.all.forwarding = 1 +]) + +ovn-nbctl --wait=hv sync +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid]) +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid]) + +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020400000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020400000000,protoinfo=(state=) +]) + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2]) +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020400000000 +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020400000000,protoinfo=(state=) +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' +# +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"' + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3]) +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020500000000,protoinfo=(state=) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3 + +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 4 + +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5]) +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020600000000 +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020600000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +tcp,orig=(src=fd13::a,dst=fd14::2,sport=,dport=),reply=(src=fd14::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=,type=129,code=0),zone=,mark=,labels=0xf0000001020600000000 +tcp,orig=(src=fd13::a,dst=fd14::2,sport=,dport=),reply=(src=fd14::2,dst=fd13::a,sport=,dport=),zone=,mark=,labels=0xf0000001020600000000,protoinfo=(state=) +]) + +check ovn-nbctl lr-route-del R2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=/' | +sed -e 's/mark=[[0-9]]*/mark=/' | sort], [0], [dnl +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +])