diff --git a/katran/decap/tc_bpf/tc_decap_stats.bpf.c b/katran/decap/tc_bpf/tc_decap_stats.bpf.c index 5ebec9920..d673164f5 100644 --- a/katran/decap/tc_bpf/tc_decap_stats.bpf.c +++ b/katran/decap/tc_bpf/tc_decap_stats.bpf.c @@ -16,13 +16,96 @@ */ #include +#include +#include +#include #include +#include +#include +#include "katran/lib/linux_includes/bpf.h" #include "katran/lib/linux_includes/bpf_helpers.h" -SEC("tc") -int tcdecapinfo(struct __sk_buff* skb) { +#include "katran/lib/bpf/balancer_consts.h" +#include "katran/lib/bpf/pckt_encap.h" +#include "katran/lib/bpf/pckt_parsing.h" + +#include "pckt_helpers.h" +#include "tc_decap_stats_maps.h" + +__attribute__((__always_inline__)) static inline void validate_tpr_server_id( + void* data, + __u64 off, + void* data_end, + bool is_ipv6, + struct __sk_buff* skb, + struct decap_tpr_stats* data_stats) { + __u16 inner_pkt_bytes; + struct packet_description inner_pckt = {}; + if (process_l3_headers(data, data_end, off, is_ipv6, &inner_pckt.flow) >= 0) { + return; + } + if (inner_pckt.flow.proto != IPPROTO_TCP) { + return; + } + if (!parse_tcp(data, data_end, is_ipv6, &inner_pckt)) { + return; + } + // only check for TCP non SYN packets + if (!(inner_pckt.flags & F_SYN_SET)) { + // lookup server id from tpr header option and compare against server_id on + // this host (if available) + __u32 s_key = 0; + __u32* server_id_host = bpf_map_lookup_elem(&tpr_server_id, &s_key); + if (server_id_host && *server_id_host > 0) { + __u32 server_id = 0; + tcp_hdr_opt_lookup_server_id_skb(skb, is_ipv6, &server_id); + if (server_id > 0) { + data_stats->tpr_total += 1; + if (*server_id_host != server_id) { + data_stats->tpr_misrouted += 1; + } + } + } + } +} + +__attribute__((__always_inline__)) static inline int process_packet( + void* data, + __u64 off, + void* data_end, + bool is_ipv6, + struct __sk_buff* skb) { + struct packet_description pckt = {}; + struct decap_tpr_stats* data_stats; + __u32 key = 0; + data_stats = bpf_map_lookup_elem(&decap_tpr_counters, &key); + if (!data_stats) { + return XDP_PASS; + } + validate_tpr_server_id(data, off, data_end, is_ipv6, skb, data_stats); return TC_ACT_UNSPEC; } +SEC("tc") +int tcdecapstats(struct __sk_buff* skb) { + void* data = (void*)(long)skb->data; + void* data_end = (void*)(long)skb->data_end; + __u32 eth_proto; + struct ethhdr* eth = data; + __u32 nh_off = sizeof(struct ethhdr); + + if (data + nh_off > data_end) { + return TC_ACT_UNSPEC; + } + eth_proto = eth->h_proto; + if (eth_proto == BE_ETH_P_IP) { + return process_packet(data, nh_off, data_end, false, skb); + } else if (eth_proto == BE_ETH_P_IPV6) { + return process_packet(data, nh_off, data_end, true, skb); + } else { + return TC_ACT_UNSPEC; + } +} + char _license[] SEC("license") = "GPL"; diff --git a/katran/decap/tc_bpf/tc_decap_stats_maps.h b/katran/decap/tc_bpf/tc_decap_stats_maps.h new file mode 100644 index 000000000..07b2adb1e --- /dev/null +++ b/katran/decap/tc_bpf/tc_decap_stats_maps.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2019-present, Facebook, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef __DECAP_STATS_MAPS_H +#define __DECAP_STATS_MAPS_H + +#include "katran/lib/linux_includes/bpf.h" +#include "katran/lib/linux_includes/bpf_helpers.h" + +#include "katran/lib/bpf/balancer_consts.h" + +#ifndef DECAP_STATS_MAP_SIZE +#define DECAP_STATS_MAP_SIZE 1 +#endif + +struct decap_tpr_stats { + __u64 tpr_misrouted; + __u64 tpr_total; +}; + +// map for tpr related counters +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, __u32); + __type(value, struct decap_tpr_stats); + __uint(max_entries, DECAP_STATS_MAP_SIZE); + __uint(map_flags, NO_FLAGS); +} decap_tpr_counters SEC(".maps"); + +// map, which contains server_id info +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); + __uint(map_flags, NO_FLAGS); +} tpr_server_id SEC(".maps"); + +#endif // of __DECAP_STATS_MAPS_H diff --git a/katran/lib/bpf/pckt_parsing.h b/katran/lib/bpf/pckt_parsing.h index 426f49c71..92eba42a7 100644 --- a/katran/lib/bpf/pckt_parsing.h +++ b/katran/lib/bpf/pckt_parsing.h @@ -211,6 +211,16 @@ int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state) return parse_hdr_opt_raw(data, data_end, state); } +int parse_hdr_opt_skb( + const struct __sk_buff* skb, + struct hdr_opt_state* state) { + __u8 *tcp_opt, kind, hdr_len; + + const void* data = (void*)(long)skb->data; + const void* data_end = (void*)(long)skb->data_end; + return parse_hdr_opt_raw(data, data_end, state); +} + __attribute__((__always_inline__)) static inline int tcp_hdr_opt_lookup_server_id( const struct xdp_md* xdp, @@ -254,6 +264,49 @@ tcp_hdr_opt_lookup_server_id( *server_id = opt_state.server_id; return 0; } +__attribute__((__always_inline__)) static inline int +tcp_hdr_opt_lookup_server_id_skb( + const struct __sk_buff* skb, + bool is_ipv6, + __u32* server_id) { + const void* data = (void*)(long)skb->data; + const void* data_end = (void*)(long)skb->data_end; + struct tcphdr* tcp_hdr; + __u8 tcp_hdr_opt_len = 0; + __u64 tcp_offset = 0; + struct hdr_opt_state opt_state = {}; + int err = 0; + + tcp_offset = calc_offset(is_ipv6, false /* is_icmp */); + tcp_hdr = (struct tcphdr*)(data + tcp_offset); + if (tcp_hdr + 1 > data_end) { + return FURTHER_PROCESSING; + } + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < TCP_HDR_OPT_LEN_TPR) { + return FURTHER_PROCESSING; + } + + opt_state.hdr_bytes_remaining = tcp_hdr_opt_len; + opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) || \ + !defined TCP_HDR_OPT_SKIP_UNROLL_LOOP + // For linux kernel version < 5.3, there isn't support in the bpf verifier + // for validating bounded loops, so we need to unroll the loop +#pragma clang loop unroll(full) +#endif + for (int i = 0; i < TCP_HDR_OPT_MAX_OPT_CHECKS; i++) { + err = parse_hdr_opt_skb(skb, &opt_state); + if (err || !opt_state.hdr_bytes_remaining) { + break; + } + } + if (!opt_state.server_id) { + return FURTHER_PROCESSING; + } + *server_id = opt_state.server_id; + return 0; +} #endif // TCP_SERVER_ID_ROUTING) || DECAP_TPR_STATS #ifdef TCP_SERVER_ID_ROUTING