From 38319bce6ba6cba1c419a1e9da0827cb6e2cbb93 Mon Sep 17 00:00:00 2001 From: Trekkie Coder Date: Wed, 27 Nov 2024 13:44:09 +0900 Subject: [PATCH] loxilb-io/loxilb#675 Initial support for proxy protocol v2 --- common/parsing_helpers.h | 12 ++++- kernel/llb_kern_cdefs.h | 92 +++++++++++++++++++++++++++++++-------- kernel/llb_kern_compose.c | 2 +- kernel/llb_kern_sum.c | 2 +- 4 files changed, 87 insertions(+), 21 deletions(-) diff --git a/common/parsing_helpers.h b/common/parsing_helpers.h index 8e49f7e..fc2eefe 100644 --- a/common/parsing_helpers.h +++ b/common/parsing_helpers.h @@ -178,7 +178,17 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) static __always_inline __be16 csum_fold_helper(__be32 csum) { - return ~((csum & 0xffff) + (csum >> 16)); + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; +} + +static __always_inline __be16 +csum_fold_helper_diff(__be32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)csum; } static __always_inline void diff --git a/kernel/llb_kern_cdefs.h b/kernel/llb_kern_cdefs.h index 48efdbb..f293a4e 100644 --- a/kernel/llb_kern_cdefs.h +++ b/kernel/llb_kern_cdefs.h @@ -770,7 +770,7 @@ do { \ #define RECPP_LATENCY(ctx, xf) #endif -#define RETURN_TO_MP_OUT() \ +#define RETURN_TO_MP_OUT(ctx) \ do { \ xf->pm.phit |= LLB_DP_RES_HIT; \ bpf_tail_call(ctx, &pgm_tbl, LLB_DP_CT_PGM_ID);\ @@ -955,9 +955,10 @@ dp_rewire_port(void *tbl, struct xfi *xf) } static int __always_inline -dp_populate_ppv2(void *md, struct xfi *xf, void *start) +dp_populate_ppv2(void *md, struct xfi *xf, void *start, __be32 *csum) { struct proxy_hdr_v2 *ppv2h; + struct proxy_ipv4_hdr *piph; __u8 sig[12] = { 0x0D, 0x0A, 0x0D, 0x0A, 0x00, 0x0D, 0x0A, 0x51, 0x55, 0x49, 0x54, 0x0A }; void *dend = DP_TC_PTR(DP_PDATA_END(md)); @@ -967,21 +968,25 @@ dp_populate_ppv2(void *md, struct xfi *xf, void *start) LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); return -1; } + memcpy(ppv2h->sig, sig, 12); ppv2h->ver_cmd = 0x21; ppv2h->family = 0x11; ppv2h->len = bpf_htons(sizeof(struct proxy_ipv4_hdr)); - struct proxy_ipv4_hdr *piph = (void *)(ppv2h + 1); + piph = (void *)(ppv2h + 1); if (piph + 1 > dend) { LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); return -1; } + piph->src_addr = xf->l34m.saddr[0]; piph->dst_addr = xf->l34m.daddr[0]; piph->src_port = xf->l34m.source; piph->dst_port = xf->l34m.dest; + *csum = bpf_csum_diff((__be32 *)ppv2h, sizeof(*ppv2h) + sizeof(*piph), 0, 0, *csum); + return 0; } @@ -990,6 +995,9 @@ dp_fixup_ppv2(void *md, struct xfi *xf) { struct tcphdr *tcp; void *dend; + __be32 oval = 0; + __u32 nval = 0; + __u32 csum = 0; if (xf->l2m.dl_type != bpf_htons(ETH_P_IP) || xf->l34m.nw_proto != IPPROTO_TCP) { return 0; @@ -1003,24 +1011,36 @@ dp_fixup_ppv2(void *md, struct xfi *xf) } if (xf->pm.oppv2) { - __u32 seq = bpf_ntohl(tcp->seq); - seq += sizeof(struct proxy_hdr_v2) + sizeof(struct proxy_ipv4_hdr); - tcp->seq = bpf_htonl(seq); + oval = tcp->seq; + nval = bpf_ntohl(tcp->seq) + sizeof(struct proxy_hdr_v2) + sizeof(struct proxy_ipv4_hdr); + tcp->seq = bpf_htonl(nval); + nval = tcp->seq; } else if (xf->pm.ippv2) { - __u32 ack = bpf_ntohl(tcp->ack_seq); - ack -= sizeof(struct proxy_hdr_v2) + sizeof(struct proxy_ipv4_hdr); - tcp->ack_seq = bpf_htonl(ack); + oval = tcp->ack_seq; + nval = bpf_ntohl(tcp->ack_seq) - (sizeof(struct proxy_hdr_v2) + sizeof(struct proxy_ipv4_hdr)); + tcp->ack_seq = bpf_htonl(nval); + nval = tcp->ack_seq; } + + csum = bpf_csum_diff((__be32 *)&nval, 4, (__be32 *)&oval, 4, tcp->check); + tcp->check = csum_fold_helper_diff((__u32)csum); + return 0; } static int __always_inline dp_ins_ppv2(void *md, struct xfi *xf) { + struct proxy_hdr_v2 *ppv2h; struct iphdr *iph; + struct tcphdr *tcp; + struct tcphdr *ntcp; void *dend; - struct proxy_hdr_v2 *ppv2h; + __u16 doff; + __u32 olp; + __u32 nlp; __u64 flags; + __u32 csum = 0; int len = sizeof(struct proxy_hdr_v2); @@ -1049,18 +1069,25 @@ dp_ins_ppv2(void *md, struct xfi *xf) return -1; } - iph->tot_len = bpf_htons(xf->pm.l3_len + len); + iph->tot_len = bpf_htons(xf->pm.l3_len + len); dp_ipv4_new_csum((void *)iph); - struct tcphdr *tcp = DP_ADD_PTR(DP_PDATA(md), xf->pm.l4_off + len); + dend = DP_TC_PTR(DP_PDATA_END(md)); + tcp = DP_ADD_PTR(DP_PDATA(md), xf->pm.l4_off + len); if (tcp + 1 > dend) { LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); return -1; } - __u16 doff = tcp->doff << 2; + doff = tcp->doff << 2; + + /* Checksum changes due to TCP segment length change */ + olp = (__u32)bpf_htons(xf->pm.l3_plen); + nlp = (__u32)bpf_htons(xf->pm.l3_plen + len); + csum = bpf_csum_diff((__be32 *)&nlp, 4, (__be32 *)&olp, 4, tcp->check); - struct tcphdr *ntcp = DP_ADD_PTR(DP_PDATA(md), xf->pm.l4_off); + + ntcp = DP_ADD_PTR(DP_PDATA(md), xf->pm.l4_off); if (ntcp + 1 > dend) { LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); return -1; @@ -1081,7 +1108,7 @@ dp_ins_ppv2(void *md, struct xfi *xf) } memcpy(ntop, top, 4); ppv2h = (void *)(ntop + 4); - dp_populate_ppv2(md, xf, ppv2h); + dp_populate_ppv2(md, xf, ppv2h, &csum); } else if (doff == 28) { __u8 *top = (void *)(tcp + 1); if (top + 8 > dend) { @@ -1095,7 +1122,7 @@ dp_ins_ppv2(void *md, struct xfi *xf) } memcpy(ntop, top, 8); ppv2h = (void *)(ntop + 8); - dp_populate_ppv2(md, xf, ppv2h); + dp_populate_ppv2(md, xf, ppv2h, &csum); } else if (doff == 32) { __u8 *top = (void *)(tcp + 1); if (top + 12 > dend) { @@ -1109,11 +1136,40 @@ dp_ins_ppv2(void *md, struct xfi *xf) } memcpy(ntop, top, 12); ppv2h = (void *)(ntop + 12); - dp_populate_ppv2(md, xf, ppv2h); + dp_populate_ppv2(md, xf, ppv2h, &csum); + } else if (doff == 36) { + __u8 *top = (void *)(tcp + 1); + if (top + 16 > dend) { + LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); + return -1; + } + __u8 *ntop = (void *)(ntcp + 1); + if (ntop + 16 > dend) { + LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); + return -1; + } + memcpy(ntop, top, 16); + ppv2h = (void *)(ntop + 16); + dp_populate_ppv2(md, xf, ppv2h, &csum); } else if (doff != 20) { LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); return -1; } + + dend = DP_TC_PTR(DP_PDATA_END(md)); + iph = DP_ADD_PTR(DP_PDATA(md), xf->pm.l3_off); + if (iph + 1 > dend) { + LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); + return -1; + } + + tcp = DP_ADD_PTR(DP_PDATA(md), xf->pm.l4_off); + if (tcp + 1 > dend) { + LLBS_PPLN_DROPC(xf, LLB_PIPE_RC_PLERR); + return -1; + } + + tcp->check = csum_fold_helper_diff((__u32)csum); } return 0; } @@ -2128,7 +2184,7 @@ dp_llb_add_crc_off(void *md, struct xfi *xf, int val) #define dp_sunp_tcall(x, y) #define TCALL_CRC1() #define TCALL_CRC2() -#define RETURN_TO_MP_OUT() +#define RETURN_TO_MP_OUT(x) #define TRACER_CALL(ctx, xf) #define RECPP_LATENCY(ctx, xf) #define DP_SET_STARTS(xf) diff --git a/kernel/llb_kern_compose.c b/kernel/llb_kern_compose.c index 93745cf..c9d6dba 100644 --- a/kernel/llb_kern_compose.c +++ b/kernel/llb_kern_compose.c @@ -992,7 +992,7 @@ dp_unparse_packet_always_slow(void *ctx, struct xfi *xf) xf->pm.nf = 0; - RETURN_TO_MP_OUT(); + RETURN_TO_MP_OUT(ctx); return DP_DROP; } diff --git a/kernel/llb_kern_sum.c b/kernel/llb_kern_sum.c index 6b0356d..d986354 100644 --- a/kernel/llb_kern_sum.c +++ b/kernel/llb_kern_sum.c @@ -99,7 +99,7 @@ dp_sctp_csum(void *ctx, struct xfi *xf) DP_LLB_SET_CRC_OFF(ctx, (sctp_csum_off<<16)); } - RETURN_TO_MP_OUT(); + RETURN_TO_MP_OUT(ctx); } /* Update state-variables */