From 961bc4016e49613869d50890f0f28965aee0c31e Mon Sep 17 00:00:00 2001 From: eplabal Date: Wed, 5 Apr 2023 14:25:56 +0200 Subject: [PATCH 1/7] libxdp implementation --- configure.ac | 47 ++++++++- src/common/sendpacket.c | 207 ++++++++++++++++++++++++++++++++++++++-- src/common/sendpacket.h | 111 ++++++++++++++++++++- src/defines.h.in | 22 +++++ src/send_packets.c | 97 ++++++++++++++++--- src/send_packets.h | 6 +- src/tcpreplay_api.c | 45 ++++++++- src/tcpreplay_api.h | 10 +- src/tcpreplay_opts.def | 9 ++ 9 files changed, 525 insertions(+), 29 deletions(-) diff --git a/configure.ac b/configure.ac index d624d4d4..199171bc 100644 --- a/configure.ac +++ b/configure.ac @@ -550,6 +550,10 @@ AC_ARG_ENABLE(force-libdnet, AS_HELP_STRING([--enable-force-libdnet],[Force using libdnet for sending packets]), [ AC_DEFINE([FORCE_INJECT_LIBDNET], [1], [Force using libdnet for sending packets])]) +AC_ARG_ENABLE(force-libxdp, + AS_HELP_STRING([--enable-force-libxdp],[Force using libxdp for sending packets]), + [ AC_DEFINE([FORCE_INJECT_LIBXDP], [1], [Force using libxdp for sending packets])]) + AC_ARG_ENABLE(force-inject, AS_HELP_STRING([--enable-force-inject],[Force using libpcap's pcap_inject() for sending packets]), [ AC_DEFINE([FORCE_INJECT_PCAP_INJECT],[1], [Force using libpcap's pcap_inject() for sending packets])]) @@ -826,7 +830,13 @@ fi # libpcap can require libnl AC_SEARCH_LIBS([nl_handle_alloc], [nl], - [AC_MSG_NOTICE([Unable to find nl library - may be needed by libpcap])]) + [AC_MSG_NOTICE([Unable to find xdp library - may be needed by libpcap])]) + +AC_CHECK_LIB(bpf, bpf_object__open_file,, + [AC_MSG_NOTICE([Unable to find libbpf library ])]) + +AC_CHECK_LIB(xdp, xsk_umem__delete,, + [AC_MSG_NOTICE([Unable to find libxdp library ])]) ## ## If not automatically configured, @@ -1383,6 +1393,37 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ AC_MSG_RESULT(no) ]) +have_libxdp=no +dnl Check for LIBXDP AF_XDP socket support +AC_MSG_CHECKING(for LIBXDP XDP packet sending support) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +#include +]], [[ + struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; + }; + struct xsk_socket *xsk; + struct xsk_ring_cons *rxr = NULL; + struct xsk_ring_prod *txr = NULL; + xsk = (struct xsk_socket*)malloc(sizeof(struct xsk_socket)); + int queue_id = 0; + xsk_socket__create(xsk, "lo", queue_id, NULL, rxr, txr, NULL); + socket(AF_XDP, SOCK_RAW, 0); +]])],[ + AC_DEFINE([HAVE_LIBXDP], [1], + [Do we have LIBXDP AF_XDP socket support?]) + AC_MSG_RESULT(yes) + have_libxdp=yes +],[ + AC_MSG_RESULT(no) +]) + have_tx_ring=no dnl Check for older Linux TX_RING support AC_MSG_CHECKING(for TX_RING socket sending support) @@ -1404,6 +1445,9 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ AC_MSG_RESULT(no) ]) +AC_CHECK_HEADERS([bpf/libbpf.h]) +AC_CHECK_HEADERS([bpf/bpf.h]) +AC_CHECK_HEADERS([xdp/libxdp.h]) AC_CHECK_HEADERS([net/bpf.h], [have_bpf=yes], [have_bpf=no]) if test $have_bpf = yes ; then @@ -1923,6 +1967,7 @@ pcap_sendpacket: ${have_pcap_sendpacket} ** pcap_netmap ${have_pcap_netmap} Linux/BSD netmap: ${have_netmap} Tuntap device support: ${have_tuntap} +LIBXDP for AF_XDP socket: ${have_libxdp} * In order of preference; see configure --help to override ** Required for tcpbridge diff --git a/src/common/sendpacket.c b/src/common/sendpacket.c index e0e0dc43..d7c12e53 100644 --- a/src/common/sendpacket.c +++ b/src/common/sendpacket.c @@ -64,6 +64,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PF_PACKET @@ -72,6 +73,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_LIBDNET @@ -80,6 +82,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_BPF @@ -88,6 +91,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PCAP_INJECT @@ -96,6 +100,7 @@ #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PCAP_SENDPACKET @@ -104,14 +109,24 @@ #undef HAVE_PCAP_INJECT #undef HAVE_BPF #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP +#endif + +#ifdef FORCE_INJECT_LIBXDP +#undef HAVE_TX_RING +#undef HAVE_LIBDNET +#undef HAVE_PF_PACKET +#undef HAVE_PCAP_INJECT +#undef HAVE_PCAP_SENDPACKET +#undef HAVE_BPF #endif #if (defined HAVE_WINPCAP && defined HAVE_PCAP_INJECT) #undef HAVE_PCAP_INJECT /* configure returns true for some odd reason */ #endif -#if !defined HAVE_PCAP_INJECT && !defined HAVE_PCAP_SENDPACKET && !defined HAVE_LIBDNET && !defined HAVE_PF_PACKET && !defined HAVE_BPF && !defined TX_RING -#error You need pcap_inject() or pcap_sendpacket() from libpcap, libdnet, Linux's PF_PACKET/TX_RING or *BSD's BPF +#if !defined HAVE_PCAP_INJECT && !defined HAVE_PCAP_SENDPACKET && !defined HAVE_LIBDNET && !defined HAVE_PF_PACKET && !defined HAVE_BPF && !defined TX_RING && !defined HAVE_LIBXDP +#error You need pcap_inject() or pcap_sendpacket() from libpcap, libdnet, Linux's PF_PACKET/TX_RING/AF_XDP with libxdp or *BSD's BPF #endif @@ -211,7 +226,15 @@ static struct tcpr_ether_addr *sendpacket_get_hwaddr_pcap(sendpacket_t *) _U_; #undef INJECT_METHOD #define INJECT_METHOD "pcap_sendpacket()" #endif - +#ifdef HAVE_LIBXDP +#include +static sendpacket_t *sendpacket_open_xsk(const char *, char *) _U_; +static struct tcpr_ether_addr *sendpacket_get_hwaddr_libxdp(sendpacket_t *); +#endif +#if defined HAVE_LIBXDP && ! defined INJECT_METHOD +#undef INJECT_METHOD +#define INJECT_METHOD "xsk_ring_prod_submit()" +#endif static void sendpacket_seterr(sendpacket_t *sp, const char *fmt, ...); static sendpacket_t * sendpacket_open_khial(const char *, char *) _U_; static struct tcpr_ether_addr * sendpacket_get_hwaddr_khial(sendpacket_t *) _U_; @@ -237,7 +260,10 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr static const size_t buffer_payload_size = sizeof(buffer) + sizeof(struct pcap_pkthdr); assert(sp); + #ifndef HAVE_LIBXDP + // In case of XDP packet processing we are storing data in sp->packet_processing->xdp_descs assert(data); + #endif if (len == 0) return -1; @@ -444,7 +470,18 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr } #endif /* HAVE_NETMAP */ break; - + case SP_TYPE_LIBXDP: + #ifdef HAVE_LIBXDP + retcode = len; + xsk_ring_prod__submit(&(sp->xsk_info->tx), sp->pckt_count); //submit all packets at once + sp->xsk_info->ring_stats.tx_npkts += sp->pckt_count; + sp->xsk_info->outstanding_tx += sp->pckt_count; + while(sp->xsk_info->outstanding_tx != 0){ + complete_tx_only(sp); + } + sp->sent += sp->pckt_count; + #endif + break; default: errx(-1, "Unsupported sp->handle_type = %d", sp->handle_type); } /* end case */ @@ -458,8 +495,15 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr retcode, len); sp->trunc_packets ++; } else { + #ifndef HAVE_LIBXDP sp->bytes_sent += len; sp->sent ++; + #else + if(sp->handle_type != SP_TYPE_LIBXDP){ + sp->bytes_sent += len; + sp->sent ++; + } + #endif } return retcode; } @@ -542,6 +586,8 @@ sendpacket_open(const char *device, char *errbuf, tcpr_dir_t direction, sp = sendpacket_open_libdnet(device, errbuf); #elif (defined HAVE_PCAP_INJECT || defined HAVE_PCAP_SENDPACKET) sp = sendpacket_open_pcap(device, errbuf); +#elif defined HAVE_LIBXDP + sp = sendpacket_open_xsk(device, errbuf); #else #error "No defined packet injection method for sendpacket_open()" #endif @@ -562,13 +608,13 @@ sendpacket_open(const char *device, char *errbuf, tcpr_dir_t direction, size_t sendpacket_getstat(sendpacket_t *sp, char *buf, size_t buf_size) { - size_t offset; + size_t offset = 0; assert(sp); assert(buf); memset(buf, 0, buf_size); - offset = snprintf(buf, buf_size, "Statistics for network device: %s\n" + snprintf(buf, buf_size, "Statistics for network device: %s\n" "\tSuccessful packets: " COUNTER_SPEC "\n" "\tFailed packets: " COUNTER_SPEC "\n" "\tTruncated packets: " COUNTER_SPEC "\n" @@ -658,7 +704,7 @@ sendpacket_close(sendpacket_t *sp) struct tcpr_ether_addr * sendpacket_get_hwaddr(sendpacket_t *sp) { - struct tcpr_ether_addr *addr; + struct tcpr_ether_addr *addr = NULL; assert(sp); /* if we already have our MAC address stored, just return it */ @@ -670,6 +716,8 @@ sendpacket_get_hwaddr(sendpacket_t *sp) } else { #if defined HAVE_PF_PACKET addr = sendpacket_get_hwaddr_pf(sp); +#elif defined HAVE_LIBXDP + addr = sendpacket_get_hwaddr_libxdp(sp); #elif defined HAVE_BPF addr = sendpacket_get_hwaddr_bpf(sp); #elif defined HAVE_LIBDNET @@ -1055,7 +1103,7 @@ sendpacket_get_hwaddr_pf(sendpacket_t *sp) } #endif /* HAVE_PF_PACKET */ -#if defined HAVE_BPF +#if 0 /** * Inner sendpacket_open() method for using BSD's BPF interface */ @@ -1244,7 +1292,7 @@ sendpacket_get_dlt(sendpacket_t *sp) /* always EN10MB */ ; } else { -#if defined HAVE_BPF +#if 0 int rcode; if ((rcode = ioctl(sp->handle.fd, BIOCGDLT, &dlt)) < 0) { @@ -1333,3 +1381,144 @@ sendpacket_abort(sendpacket_t *sp) sp->abort = true; } +#ifdef HAVE_LIBXDP +static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, struct xsk_socket_config* cfg, int queue_id, const char *device) +{ + struct xsk_socket_info *xsk; + struct xsk_ring_cons *rxr = NULL; + struct xsk_ring_prod *txr; + int ret; + xsk = (struct xsk_socket_info*)safe_malloc(sizeof(struct xsk_socket_info)); + xsk->umem = umem; + ret = xsk_socket__create(&xsk->xsk, device, queue_id, umem->umem, rxr, &xsk->tx, cfg); + if (ret){ + return NULL; + } + + memset(&xsk->app_stats, 0, sizeof(xsk->app_stats)); + + return xsk; +} + +static sendpacket_t * sendpacket_open_xsk(const char *device, char *errbuf){ + sendpacket_t *sp; + + assert(device); + assert(errbuf); + + int nb_of_frames = 4096; + int frame_size = 4096; + int nb_of_completion_queue_desc = 4096; + int nb_of_fill_queue_desc = 4096; + struct xsk_umem_info* umem_info = create_umem_area(nb_of_frames, frame_size, nb_of_completion_queue_desc, nb_of_fill_queue_desc); + if(umem_info == NULL){ + return NULL; + } + + int nb_of_tx_queue_desc = 4096; + int nb_of_rx_queue_desc = 4096; + u_int32_t queue_id = 0; + struct xsk_socket_info* xsk_info = create_xsk_socket(umem_info, nb_of_tx_queue_desc, nb_of_rx_queue_desc, device, queue_id, errbuf); + if(xsk_info == NULL){ + return NULL; + } + + sp = (sendpacket_t *)safe_malloc(sizeof(sendpacket_t)); + strlcpy(sp->device, device, sizeof(sp->device)); + sp->handle.fd = xsk_info->xsk->fd; + sp->handle_type = SP_TYPE_LIBXDP; + sp->xsk_info = xsk_info; + sp->umem_info = umem_info; + sp->frame_size = frame_size; + return sp; +} + +struct xsk_umem_info* create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_descs, int nb_of_fill_queue_descs){ + int umem_size = nb_of_frames * frame_size; + struct xsk_umem_info *umem; + void* umem_area = NULL; + struct xsk_umem_config cfg = { + /* We recommend that you set the fill ring size >= HW RX ring size + + * AF_XDP RX ring size. Make sure you fill up the fill ring + * with buffers at regular intervals, and you will with this setting + * avoid allocation failures in the driver. These are usually quite + * expensive since drivers have not been written to assume that + * allocation failures are common. For regular sockets, kernel + * allocated memory is used that only runs out in OOM situations + * that should be rare. + */ + .fill_size = nb_of_fill_queue_descs * 2, + .comp_size = nb_of_completion_queue_descs, + .frame_size = frame_size, + .frame_headroom = 0, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG + }; + umem = (struct xsk_umem_info*)safe_malloc(sizeof(struct xsk_umem_info)); + if (posix_memalign(&umem_area, getpagesize(), /* PAGE_SIZE aligned */ + umem_size)) { + fprintf(stderr, "ERROR: Can't allocate buffer memory \"%s\"\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + int ret = xsk_umem__create(&umem->umem, umem_area, umem_size, &umem->fq, &umem->cq, &cfg); + umem->buffer = umem_area; + if(ret != 0){ + return NULL; + } + return umem; +} + +static struct xsk_socket_info* create_xsk_socket(struct xsk_umem_info* umem_info, int nb_of_tx_queue_desc, int nb_of_rx_queue_desc, const char *device, u_int32_t queue_id, char *errbuf){ + struct xsk_socket_info* xsk_info = (struct xsk_socket_info*)safe_malloc(sizeof(struct xsk_socket_info)); + struct xsk_socket_config* socket_config = (struct xsk_socket_config*)safe_malloc(sizeof(struct xsk_socket_config)); + + socket_config->rx_size = nb_of_rx_queue_desc; + socket_config->tx_size = nb_of_tx_queue_desc; + socket_config->libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; + socket_config->bind_flags = 0; //XDP_FLAGS_SKB_MODE (1U << 1) or XDP_FLAGS_DRV_MODE (1U << 2) + xsk_info = xsk_configure_socket(umem_info, socket_config, queue_id, device); + + if(xsk_info == NULL){ + snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "AF_XDP socket configuration is not successful: %s", strerror(errno)); + return NULL; + } + return xsk_info; +} + +/** + * gets the hardware address via Linux's PF packet interface + */ +static struct tcpr_ether_addr * +sendpacket_get_hwaddr_libxdp(sendpacket_t *sp) +{ + struct ifreq ifr; + int fd; + + assert(sp); + + if (!sp->open) { + sendpacket_seterr(sp, "Unable to get hardware address on un-opened sendpacket handle"); + return NULL; + } + + + /* create dummy socket for ioctl */ + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + sendpacket_seterr(sp, "Unable to open dummy socket for get_hwaddr: %s", strerror(errno)); + return NULL; + } + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, sp->device, sizeof(ifr.ifr_name)); + + if (ioctl(fd, SIOCGIFHWADDR, (int8_t *)&ifr) < 0) { + close(fd); + sendpacket_seterr(sp, "Error getting hardware address: %s", strerror(errno)); + return NULL; + } + + memcpy(&sp->ether, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + close(fd); + return(&sp->ether); +} +#endif /*HAVE_LIBXDP*/ diff --git a/src/common/sendpacket.h b/src/common/sendpacket.h index 248b87f2..f13762ef 100644 --- a/src/common/sendpacket.h +++ b/src/common/sendpacket.h @@ -69,7 +69,8 @@ typedef enum sendpacket_type_e { SP_TYPE_TX_RING, SP_TYPE_KHIAL, SP_TYPE_NETMAP, - SP_TYPE_TUNTAP + SP_TYPE_TUNTAP, + SP_TYPE_LIBXDP } sendpacket_type_t; /* these are the file_operations ioctls */ @@ -94,6 +95,70 @@ union sendpacket_handle { #define SENDPACKET_ERRBUF_SIZE 1024 #define MAX_IFNAMELEN 64 +#ifdef HAVE_LIBXDP +#include +#include +#include + +struct xsk_ring_stats { + unsigned long rx_npkts; + unsigned long tx_npkts; + unsigned long rx_dropped_npkts; + unsigned long rx_invalid_npkts; + unsigned long tx_invalid_npkts; + unsigned long rx_full_npkts; + unsigned long rx_fill_empty_npkts; + unsigned long tx_empty_npkts; + unsigned long prev_rx_npkts; + unsigned long prev_tx_npkts; + unsigned long prev_rx_dropped_npkts; + unsigned long prev_rx_invalid_npkts; + unsigned long prev_tx_invalid_npkts; + unsigned long prev_rx_full_npkts; + unsigned long prev_rx_fill_empty_npkts; + unsigned long prev_tx_empty_npkts; +}; +struct xsk_driver_stats { + unsigned long intrs; + unsigned long prev_intrs; +}; +struct xsk_app_stats { + unsigned long rx_empty_polls; + unsigned long fill_fail_polls; + unsigned long copy_tx_sendtos; + unsigned long tx_wakeup_sendtos; + unsigned long opt_polls; + unsigned long prev_rx_empty_polls; + unsigned long prev_fill_fail_polls; + unsigned long prev_copy_tx_sendtos; + unsigned long prev_tx_wakeup_sendtos; + unsigned long prev_opt_polls; +}; +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + void *buffer; +}; +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; +}; +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct xsk_ring_stats ring_stats; + struct xsk_app_stats app_stats; + struct xsk_driver_stats drv_stats; + u_int32_t outstanding_tx; +}; +#endif /*HAVE_LIBXDP*/ + struct sendpacket_s { tcpr_dir_t cache_dir; int open; @@ -144,12 +209,54 @@ struct sendpacket_s { #ifdef HAVE_TX_RING txring_t * tx_ring; #endif +#endif +#ifdef HAVE_LIBXDP + struct xsk_socket_info* xsk_info; + struct xsk_umem_info* umem_info; + unsigned int batch_size; + unsigned int pckt_count; + int frame_size; + int tx_idx; #endif bool abort; }; - typedef struct sendpacket_s sendpacket_t; +#ifdef HAVE_LIBXDP +struct xsk_umem_info* create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_descs, int nb_of_fill_queue_descs); +static struct xsk_socket_info* create_xsk_socket(struct xsk_umem_info* umem, int nb_of_tx_queue_desc, int nb_of_rx_queue_desc, const char *device, u_int32_t queue_id, char *errbuf); +static inline void gen_eth_frame(struct xsk_umem_info *umem, u_int64_t addr, u_char* pkt_data, COUNTER pkt_size) +{ + memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, pkt_size); +} + +static inline void kick_tx(struct xsk_socket_info *xsk) +{ + int ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || + errno == EBUSY || errno == ENETDOWN){ + return; + } + printf("%s\n", "Packet sending exited with error!"); + exit(ret); +} +static inline void complete_tx_only(sendpacket_t *sp) +{ + int completion_idx = 0; + if (sp->xsk_info->outstanding_tx == 0){ + return; + } + if (xsk_ring_prod__needs_wakeup(&(sp->xsk_info->tx))) { + sp->xsk_info->app_stats.tx_wakeup_sendtos++; + kick_tx(sp->xsk_info); + } + unsigned int rcvd = xsk_ring_cons__peek(&sp->xsk_info->umem->cq, sp->pckt_count, &(completion_idx)); + if (rcvd > 0) { + xsk_ring_cons__release(&sp->xsk_info->umem->cq, rcvd); + sp->xsk_info->outstanding_tx -= rcvd; + } +} +#endif /*HAVE_LIBXDP*/ int sendpacket(sendpacket_t *, const u_char *, size_t, struct pcap_pkthdr *); void sendpacket_close(sendpacket_t *); char *sendpacket_geterr(sendpacket_t *); diff --git a/src/defines.h.in b/src/defines.h.in index ea50e0ab..4ce62d3b 100644 --- a/src/defines.h.in +++ b/src/defines.h.in @@ -44,6 +44,28 @@ #include "tcpr.h" +#ifdef HAVE_BPF +#include +#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 +#endif + +#ifdef HAVE_LIBBPF +#undef HAVE_BPF +#include +#include +#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 + +struct bpf_program { +char dummy[0]; +}; + +#endif + +#ifdef HAVE_LIBXDP +#include +#endif + + #ifdef HAVE_BPF #include #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 diff --git a/src/send_packets.c b/src/send_packets.c index effe659e..044b2bd9 100644 --- a/src/send_packets.c +++ b/src/send_packets.c @@ -60,7 +60,6 @@ extern tcpedit_t *tcpedit; #include "send_packets.h" #include "sleep.h" - #ifdef DEBUG extern int debug; #endif @@ -71,7 +70,7 @@ static void calc_sleep_time(tcpreplay_t *ctx, struct timeval *pkt_time, COUNTER start_us, COUNTER *skip_length); static void tcpr_sleep(tcpreplay_t *ctx, sendpacket_t *sp _U_, struct timespec *nap_this_time, struct timeval *now); -static u_char *get_next_packet(tcpreplay_t *ctx, pcap_t *pcap, +static u_char *get_next_packet(tcpreplay_opt_t *options, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int file_idx, packet_cache_t **prev_packet); @@ -283,7 +282,7 @@ preload_pcap_file(tcpreplay_t *ctx, int idx) dlt = pcap_datalink(pcap); /* loop through the pcap. get_next_packet() builds the cache for us! */ - while ((pktdata = get_next_packet(ctx, pcap, &pkthdr, idx, prev_packet)) != NULL) { + while ((pktdata = get_next_packet(ctx->options, pcap, &pkthdr, idx, prev_packet)) != NULL) { packetnum++; if (options->flow_stats) update_flow_stats(ctx, NULL, &pkthdr, pktdata, dlt); @@ -338,6 +337,7 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) bool top_speed = (options->speed.mode == speed_topspeed || (options->speed.mode == speed_mbpsrate && options->speed.speed == 0)); bool now_is_now = true; + bool read_next_packet = true; // used for LIBXDP batch packet processing with cached packets gettimeofday(&now, NULL); if (!timerisset(&stats->start_time)) { @@ -367,8 +367,8 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) * Keep sending while we have packets or until * we've sent enough packets */ - while (!ctx->abort && - (pktdata = get_next_packet(ctx, pcap, &pkthdr, idx, prev_packet)) != NULL) { + while (!ctx->abort && read_next_packet && + (pktdata = get_next_packet(ctx->options, pcap, &pkthdr, idx, prev_packet)) != NULL) { now_is_now = false; packetnum++; @@ -473,8 +473,18 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) /* * we know how long to sleep between sends, now do it. */ - if (!top_speed) + if (!top_speed){ + #ifndef HAVE_LIBXDP tcpr_sleep(ctx, sp, &ctx->nap, &now); + #else + if(sp->handle_type != SP_TYPE_LIBXDP){ + tcpr_sleep(ctx, sp, &ctx->nap, &now); + }else if(sp->batch_size == 1){ + //In case of LIBXDP packet processing waiting only makes sense when batch size is one + tcpr_sleep(ctx, sp, &ctx->nap, &now); + } + #endif + } } #ifdef ENABLE_VERBOSE @@ -483,6 +493,18 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) tcpdump_print(options->tcpdump, &pkthdr, pktdata); #endif +#ifdef HAVE_LIBXDP + if(sp->handle_type == SP_TYPE_LIBXDP){ + /*Reserve frames for the batch*/ + while (xsk_ring_prod__reserve(&(sp->xsk_info->tx), sp->batch_size, &(sp->tx_idx)) < sp->batch_size) { + complete_tx_only(sp); + } + /*The first packet is already in memory*/ + prepare_first_element_of_batch(ctx, &packetnum, pktdata, pkthdr.len); + /*Read more packets and prepare batch*/ + prepare_remaining_elements_of_batch(ctx, &packetnum, &read_next_packet, pcap, &idx, pkthdr, prev_packet); + } +#endif dbgx(2, "Sending packet #" COUNTER_SPEC, packetnum); /* write packet out on network */ if (sendpacket(sp, pktdata, pktlen, &pkthdr) < (int)pktlen) { @@ -500,7 +522,13 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) #endif stats->pkts_sent++; + #ifndef HAVE_LIBXDP stats->bytes_sent += pktlen; + #else + if(sp->handle_type != SP_TYPE_LIBXDP){ + stats->bytes_sent += pktlen; + } + #endif /* print stats during the run? */ if (options->stats > 0) { @@ -545,7 +573,6 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) } } #endif /* HAVE_NETMAP */ - if (!now_is_now) gettimeofday(&now, NULL); @@ -607,8 +634,8 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * prev_packet2 = NULL; } - pktdata1 = get_next_packet(ctx, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); - pktdata2 = get_next_packet(ctx, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); + pktdata1 = get_next_packet(ctx->options, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); + pktdata2 = get_next_packet(ctx->options, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); /* MAIN LOOP * Keep sending while we have packets or until @@ -797,9 +824,9 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * /* get the next packet for this file handle depending on which we last used */ if (sp == ctx->intf2) { - pktdata2 = get_next_packet(ctx, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); + pktdata2 = get_next_packet(ctx->options, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); } else { - pktdata1 = get_next_packet(ctx, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); + pktdata1 = get_next_packet(ctx->options, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); } /* stop sending based on the duration limit... */ @@ -844,10 +871,9 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * * will be updated as new entries are added (or retrieved) from the cache list. */ u_char * -get_next_packet(tcpreplay_t *ctx, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int idx, +get_next_packet(tcpreplay_opt_t *options, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int idx, packet_cache_t **prev_packet) { - tcpreplay_opt_t *options = ctx->options; u_char *pktdata = NULL; uint32_t pktlen; @@ -1211,3 +1237,48 @@ get_user_count(tcpreplay_t *ctx, sendpacket_t *sp, COUNTER counter) return(uint32_t)send; } +#ifdef HAVE_LIBXDP +static inline void fill_umem_with_data_and_set_xdp_desc(sendpacket_t* sp, int tx_idx, COUNTER umem_index, u_char* pktdata, int len){ + COUNTER umem_index_mod = (umem_index % sp->batch_size) * sp->frame_size; // packets are sent in batch, after each batch umem memory is reusable + gen_eth_frame(sp->umem_info, umem_index_mod, pktdata, len); + struct xdp_desc* xdp_desc = xsk_ring_prod__tx_desc(&(sp->xsk_info->tx), tx_idx); + xdp_desc->addr = (COUNTER)(umem_index_mod); + xdp_desc->len = len; +} + +static inline void prepare_first_element_of_batch(tcpreplay_t *ctx, COUNTER* packetnum, u_char* pktdata, u_int32_t len){ + sendpacket_t* sp = ctx->intf1; + tcpreplay_stats_t *stats = &ctx->stats; + fill_umem_with_data_and_set_xdp_desc(sp, sp->tx_idx, *packetnum-1, pktdata, len); + sp->bytes_sent += len; + stats->bytes_sent += len; +} + +static inline void prepare_remaining_elements_of_batch(tcpreplay_t *ctx, COUNTER* packetnum, bool* read_next_packet, pcap_t *pcap, int* idx, struct pcap_pkthdr pkthdr, packet_cache_t **prev_packet){ + sendpacket_t* sp = ctx->intf1; + tcpreplay_stats_t *stats = &ctx->stats; + int datalink = ctx->options->file_cache[*idx].dlt; + bool preload = ctx->options->file_cache[*idx].cached; + u_char* pktdata = NULL; + unsigned int pckt_count = 1; + while(!ctx->abort && + (pckt_count < sp->batch_size) && (pktdata = get_next_packet(ctx->options, pcap, &pkthdr, *idx, prev_packet)) != NULL){ + fill_umem_with_data_and_set_xdp_desc(sp, sp->tx_idx + pckt_count, *packetnum, pktdata, pkthdr.len); + ++pckt_count; + ++*packetnum; + stats->bytes_sent += pkthdr.len; + sp->bytes_sent += pkthdr.len; + stats->pkts_sent++; + if (ctx->options->flow_stats && !preload){ + update_flow_stats(ctx, + ctx->options->cache_packets ? sp : NULL, &pkthdr, pktdata, datalink); + } + } + if(pckt_count < sp-> batch_size){ + // No more packets to read, it is essential for cached packet processing + *read_next_packet = false; + } + sp->pckt_count = pckt_count; + dbgx(2, "Sending packets with LIBXDP in batch, packet numbers from %llu to %llu\n", packetnum - pckt_count +1, packetnum); +} +#endif /*HAVE_LIBXDP*/ diff --git a/src/send_packets.h b/src/send_packets.h index 316faa32..d6105742 100644 --- a/src/send_packets.h +++ b/src/send_packets.h @@ -28,5 +28,9 @@ void send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx); void send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int idx1, pcap_t *pcap2, int idx2); void *cache_mode(tcpreplay_t *ctx, char *cachedata, COUNTER packet_num); void preload_pcap_file(tcpreplay_t *ctx, int idx); - +#ifdef HAVE_LIBXDP +static inline void prepare_remaining_elements_of_batch(tcpreplay_t *ctx, COUNTER* packetnum, bool* read_next_packet, pcap_t *pcap, int* idx, struct pcap_pkthdr pkthdr, packet_cache_t **prev_packet); +static inline void prepare_first_element_of_batch(tcpreplay_t *ctx, COUNTER* packetnum, u_char* pktdata, u_int32_t len); +static inline void fill_umem_with_data_and_set_xdp_desc(sendpacket_t* sp, int tx_idx, COUNTER umem_index, u_char* pktdata, int len); +#endif #endif diff --git a/src/tcpreplay_api.c b/src/tcpreplay_api.c index d429b91b..fe876f88 100644 --- a/src/tcpreplay_api.c +++ b/src/tcpreplay_api.c @@ -358,7 +358,9 @@ tcpreplay_post_args(tcpreplay_t *ctx, int argc) ret = -1; goto out; } - +#ifdef HAVE_LIBXDP + ctx->intf1->batch_size = OPT_VALUE_BATCH_SIZE; +#endif #if defined HAVE_NETMAP ctx->intf1->netmap_delay = ctx->options->netmap_delay; #endif @@ -429,6 +431,15 @@ tcpreplay_close(tcpreplay_t *ctx) assert(ctx->options); options = ctx->options; +#ifdef HAVE_LIBXDP + if(ctx->intf1->handle_type == SP_TYPE_LIBXDP){ + free_umem_and_xsk(ctx->intf1); + if(ctx->intf2){ + free_umem_and_xsk(ctx->intf2); + } + } +#endif + safe_free(options->intf1_name); safe_free(options->intf2_name); sendpacket_close(ctx->intf1); @@ -1356,3 +1367,35 @@ int tcpreplay_get_flow_expiry(tcpreplay_t *ctx) return ctx->options->flow_expiry; } + +#ifdef HAVE_LIBXDP +void delete_xsk_socket(struct xsk_socket *xsk) +{ + size_t desc_sz = sizeof(struct xdp_desc); + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); + } + } + close(xsk->fd); +} + +void free_umem_and_xsk(sendpacket_t* sp){ + xsk_umem__delete(sp->xsk_info->umem->umem); + delete_xsk_socket(sp->xsk_info->xsk); +} +#endif /*HAVE_LIBXDP*/ diff --git a/src/tcpreplay_api.h b/src/tcpreplay_api.h index 7c4e94e9..1c2254f4 100644 --- a/src/tcpreplay_api.h +++ b/src/tcpreplay_api.h @@ -33,7 +33,10 @@ #ifdef ENABLE_DMALLOC #include #endif - +#ifdef HAVE_LIBXDP +#include +#include +#endif #ifdef __cplusplus @@ -203,7 +206,6 @@ typedef struct tcpreplay_s { bool running; } tcpreplay_t; - /* * manual callback definition: * ctx = tcpreplay context @@ -286,6 +288,10 @@ int tcpreplay_set_tcpdump(tcpreplay_t *, tcpdump_t *); void __tcpreplay_seterr(tcpreplay_t *ctx, const char *func, const int line, const char *file, const char *fmt, ...); void tcpreplay_setwarn(tcpreplay_t *ctx, const char *fmt, ...); +#ifdef HAVE_LIBXDP +void delete_xsk_socket(struct xsk_socket *xsk); +void free_umem_and_xsk(sendpacket_t* sp); +#endif #ifdef __cplusplus } #endif diff --git a/src/tcpreplay_opts.def b/src/tcpreplay_opts.def index a0036f76..9d3d5e57 100644 --- a/src/tcpreplay_opts.def +++ b/src/tcpreplay_opts.def @@ -663,6 +663,15 @@ EOVersion; doc = ""; }; +flag = { + name = batch-size; + arg-type = number; + arg-range = "1->4096"; + descrip = "The maximum number of packets that can be submitted to the Tx ring at once"; + arg-default = 25; + doc = ""; +}; + flag = { name = less-help; value = "h"; From d5229438cd7ce3b67f96a6a4a1ce8ee1e17aced7 Mon Sep 17 00:00:00 2001 From: bplangar Date: Mon, 8 May 2023 11:53:53 +0200 Subject: [PATCH 2/7] Fix loop feature for AF_XDP packet sending --- src/common/sendpacket.c | 1 + src/common/sendpacket.h | 1 + src/tcpreplay_api.c | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/src/common/sendpacket.c b/src/common/sendpacket.c index d7c12e53..abb1acd0 100644 --- a/src/common/sendpacket.c +++ b/src/common/sendpacket.c @@ -1430,6 +1430,7 @@ static sendpacket_t * sendpacket_open_xsk(const char *device, char *errbuf){ sp->xsk_info = xsk_info; sp->umem_info = umem_info; sp->frame_size = frame_size; + sp->tx_size = nb_of_tx_queue_desc; return sp; } diff --git a/src/common/sendpacket.h b/src/common/sendpacket.h index f13762ef..c94fb181 100644 --- a/src/common/sendpacket.h +++ b/src/common/sendpacket.h @@ -217,6 +217,7 @@ struct sendpacket_s { unsigned int pckt_count; int frame_size; int tx_idx; + int tx_size; #endif bool abort; }; diff --git a/src/tcpreplay_api.c b/src/tcpreplay_api.c index fe876f88..98942773 100644 --- a/src/tcpreplay_api.c +++ b/src/tcpreplay_api.c @@ -1168,6 +1168,14 @@ tcpreplay_replay(tcpreplay_t *ctx) if (ctx->options->stats == 0) packet_stats(&ctx->stats); } + + #ifdef HAVE_LIBXDP + sendpacket_t* sp = ctx->intf1; + if(sp->handle_type == SP_TYPE_LIBXDP){ + sp->xsk_info->tx.cached_prod = 0; + sp->xsk_info->tx.cached_cons = sp->tx_size; + } + #endif } } else { while (!ctx->abort) { /* loop forever unless user aborts */ From 5bdbad30f864893e6e18eda0380d647470f487de Mon Sep 17 00:00:00 2001 From: bplangar Date: Mon, 8 May 2023 21:04:32 +0200 Subject: [PATCH 3/7] Check packet fits in umem frame before copying --- src/send_packets.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/send_packets.c b/src/send_packets.c index 044b2bd9..8fc71879 100644 --- a/src/send_packets.c +++ b/src/send_packets.c @@ -1238,7 +1238,16 @@ get_user_count(tcpreplay_t *ctx, sendpacket_t *sp, COUNTER counter) return(uint32_t)send; } #ifdef HAVE_LIBXDP +void check_packet_fits_in_umem_frame(sendpacket_t* sp, int packet_len){ + if(packet_len > sp->frame_size){ + fprintf(stderr, "ERROR: packet size cannot be larger than the size of an UMEM frame! Packet size: %i Frame size: %i\n", packet_len, sp->frame_size); + free_umem_and_xsk(sp); + exit(-1); + } +} + static inline void fill_umem_with_data_and_set_xdp_desc(sendpacket_t* sp, int tx_idx, COUNTER umem_index, u_char* pktdata, int len){ + check_packet_fits_in_umem_frame(sp, len); COUNTER umem_index_mod = (umem_index % sp->batch_size) * sp->frame_size; // packets are sent in batch, after each batch umem memory is reusable gen_eth_frame(sp->umem_info, umem_index_mod, pktdata, len); struct xdp_desc* xdp_desc = xsk_ring_prod__tx_desc(&(sp->xsk_info->tx), tx_idx); From 5ac105dc4f2b16ad5f16311cc372855432b620a2 Mon Sep 17 00:00:00 2001 From: eplabal Date: Mon, 5 Jun 2023 15:17:16 +0200 Subject: [PATCH 4/7] Fix copy/paste error in configure.ac --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 199171bc..344d6ecf 100644 --- a/configure.ac +++ b/configure.ac @@ -830,7 +830,7 @@ fi # libpcap can require libnl AC_SEARCH_LIBS([nl_handle_alloc], [nl], - [AC_MSG_NOTICE([Unable to find xdp library - may be needed by libpcap])]) + [AC_MSG_NOTICE([Unable to find nl library - may be needed by libpcap])]) AC_CHECK_LIB(bpf, bpf_object__open_file,, [AC_MSG_NOTICE([Unable to find libbpf library ])]) From 4f33fbf06eb4b24e8c8be4787d11586fa7e38291 Mon Sep 17 00:00:00 2001 From: Fred Date: Sun, 3 Sep 2023 20:13:31 -0700 Subject: [PATCH 5/7] Feature #822: fix compile issue --- configure.ac | 5 ++--- src/common/sendpacket.c | 11 ++++------- src/common/sendpacket.h | 22 ++++++++++++---------- src/send_packets.c | 10 +++++----- src/tcpedit/plugins/dlt_en10mb/en10mb.c | 4 ++-- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/configure.ac b/configure.ac index 97dc4310..12b6b31a 100644 --- a/configure.ac +++ b/configure.ac @@ -1424,12 +1424,11 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ struct xsk_socket_config config; int fd; }; - struct xsk_socket *xsk; + struct xsk_socket xsk; struct xsk_ring_cons *rxr = NULL; struct xsk_ring_prod *txr = NULL; - xsk = (struct xsk_socket*)malloc(sizeof(struct xsk_socket)); int queue_id = 0; - xsk_socket__create(xsk, "lo", queue_id, NULL, rxr, txr, NULL); + xsk_socket__create(&xsk, "lo", queue_id, NULL, rxr, txr, NULL); socket(AF_XDP, SOCK_RAW, 0); ]])],[ AC_DEFINE([HAVE_LIBXDP], [1], diff --git a/src/common/sendpacket.c b/src/common/sendpacket.c index a0d26608..3fae5c88 100644 --- a/src/common/sendpacket.c +++ b/src/common/sendpacket.c @@ -132,9 +132,6 @@ #ifdef HAVE_SYS_PARAM_H #include #endif -#ifdef HAVE_SYS_SYSCTL_H -#include -#endif #ifdef HAVE_NET_ROUTE_H #include #endif @@ -1353,8 +1350,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, struct xsk_socket_config *cfg, { struct xsk_socket_info *xsk; struct xsk_ring_cons *rxr = NULL; - struct xsk_ring_prod *txr; int ret; + xsk = (struct xsk_socket_info *)safe_malloc(sizeof(struct xsk_socket_info)); xsk->umem = umem; ret = xsk_socket__create(&xsk->xsk, device, queue_id, umem->umem, rxr, &xsk->tx, cfg); @@ -1440,7 +1437,7 @@ create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_de return umem; } -static struct xsk_socket_info * +struct xsk_socket_info * create_xsk_socket(struct xsk_umem_info *umem_info, int nb_of_tx_queue_desc, int nb_of_rx_queue_desc, @@ -1464,10 +1461,10 @@ create_xsk_socket(struct xsk_umem_info *umem_info, return xsk_info; } -/** +/* * gets the hardware address via Linux's PF packet interface */ -static struct tcpr_ether_addr * +static _U_ struct tcpr_ether_addr * sendpacket_get_hwaddr_libxdp(sendpacket_t *sp) { struct ifreq ifr; diff --git a/src/common/sendpacket.h b/src/common/sendpacket.h index b95b4934..73f2cddd 100644 --- a/src/common/sendpacket.h +++ b/src/common/sendpacket.h @@ -94,6 +94,7 @@ union sendpacket_handle { #ifdef HAVE_LIBXDP #include +#include #include #include @@ -212,7 +213,7 @@ struct sendpacket_s { unsigned int batch_size; unsigned int pckt_count; int frame_size; - int tx_idx; + unsigned int tx_idx; int tx_size; #endif bool abort; @@ -222,12 +223,12 @@ typedef struct sendpacket_s sendpacket_t; #ifdef HAVE_LIBXDP struct xsk_umem_info * create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_descs, int nb_of_fill_queue_descs); -static struct xsk_socket_info *create_xsk_socket(struct xsk_umem_info *umem, - int nb_of_tx_queue_desc, - int nb_of_rx_queue_desc, - const char *device, - u_int32_t queue_id, - char *errbuf); +struct xsk_socket_info *create_xsk_socket(struct xsk_umem_info *umem, + int nb_of_tx_queue_desc, + int nb_of_rx_queue_desc, + const char *device, + u_int32_t queue_id, + char *errbuf); static inline void gen_eth_frame(struct xsk_umem_info *umem, u_int64_t addr, u_char *pkt_data, COUNTER pkt_size) { @@ -242,12 +243,13 @@ kick_tx(struct xsk_socket_info *xsk) return; } printf("%s\n", "Packet sending exited with error!"); - exit(ret); + exit (1); } + static inline void complete_tx_only(sendpacket_t *sp) { - int completion_idx = 0; + u_int32_t completion_idx = 0; if (sp->xsk_info->outstanding_tx == 0) { return; } @@ -255,7 +257,7 @@ complete_tx_only(sendpacket_t *sp) sp->xsk_info->app_stats.tx_wakeup_sendtos++; kick_tx(sp->xsk_info); } - unsigned int rcvd = xsk_ring_cons__peek(&sp->xsk_info->umem->cq, sp->pckt_count, &(completion_idx)); + unsigned int rcvd = xsk_ring_cons__peek(&sp->xsk_info->umem->cq, sp->pckt_count, &completion_idx); if (rcvd > 0) { xsk_ring_cons__release(&sp->xsk_info->umem->cq, rcvd); sp->xsk_info->outstanding_tx -= rcvd; diff --git a/src/send_packets.c b/src/send_packets.c index 69b8ce88..e9e445d9 100644 --- a/src/send_packets.c +++ b/src/send_packets.c @@ -386,7 +386,7 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) * we've sent enough packets */ while (!ctx->abort && read_next_packet && - (pktdata = get_next_packet(ctx, pcap, &pkthdr, idx, prev_packet)) != NULL) { + (pktdata = get_next_packet(options, pcap, &pkthdr, idx, prev_packet)) != NULL) { now_is_now = false; packetnum++; #if defined TCPREPLAY || defined TCPREPLAY_EDIT @@ -515,7 +515,7 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) #ifdef HAVE_LIBXDP if (sp->handle_type == SP_TYPE_LIBXDP) { /* Reserve frames for the batc h*/ - while (xsk_ring_prod__reserve(&(sp->xsk_info->tx), sp->batch_size, &(sp->tx_idx)) < sp->batch_size) { + while (xsk_ring_prod__reserve(&(sp->xsk_info->tx), sp->batch_size, &sp->tx_idx) < sp->batch_size) { complete_tx_only(sp); } /* The first packet is already in memory */ @@ -1315,8 +1315,8 @@ prepare_remaining_elements_of_batch(tcpreplay_t *ctx, } sp->pckt_count = pckt_count; dbgx(2, - "Sending packets with LIBXDP in batch, packet numbers from %llu to %llu\n", - packetnum - pckt_count + 1, - packetnum); + "Sending packets with LIBXDP in batch, packet numbers from " COUNTER_SPEC " to " COUNTER_SPEC "\n", + *packetnum - pckt_count + 1, + *packetnum); } #endif /* HAVE_LIBXDP */ diff --git a/src/tcpedit/plugins/dlt_en10mb/en10mb.c b/src/tcpedit/plugins/dlt_en10mb/en10mb.c index 0c24d8e5..957ce20d 100644 --- a/src/tcpedit/plugins/dlt_en10mb/en10mb.c +++ b/src/tcpedit/plugins/dlt_en10mb/en10mb.c @@ -184,7 +184,7 @@ dlt_en10mb_parse_subsmac(tcpeditdlt_t *ctx, en10mb_config_t *config, const char { size_t input_len = strlen(input); size_t possible_entries_number = (input_len / (SUBSMAC_ENTRY_LEN + 1)) + 1; - int entry; + size_t entry; en10mb_sub_entry_t *entries = safe_malloc(possible_entries_number * sizeof(en10mb_sub_entry_t)); @@ -524,7 +524,7 @@ dlt_en10mb_encode(tcpeditdlt_t *ctx, u_char *packet, int pktlen, tcpr_dir_t dir) newl2len = TCPR_802_1Q_H; } - if (pktlen < newl2len || pktlen + newl2len - ctx->l2len > MAXPACKET) { + if ((uint32_t)pktlen < newl2len || pktlen + newl2len - ctx->l2len > MAXPACKET) { tcpedit_seterr(ctx->tcpedit, "Unable to process packet #" COUNTER_SPEC " since its new length is %d bytes.", ctx->tcpedit->runtime.packetnum, From dccbbc298c48956e3184519c415156e3f504d9a9 Mon Sep 17 00:00:00 2001 From: Fred Klassen Date: Mon, 4 Sep 2023 17:31:24 +0000 Subject: [PATCH 6/7] Feature #822: rename --batch-size to --xdp-batch-size --- src/tcpreplay_api.c | 2 +- src/tcpreplay_opts.def | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/tcpreplay_api.c b/src/tcpreplay_api.c index 41b32c01..3c14286b 100644 --- a/src/tcpreplay_api.c +++ b/src/tcpreplay_api.c @@ -359,7 +359,7 @@ tcpreplay_post_args(tcpreplay_t *ctx, int argc) goto out; } #ifdef HAVE_LIBXDP - ctx->intf1->batch_size = OPT_VALUE_BATCH_SIZE; + ctx->intf1->batch_size = OPT_VALUE_XDP_BATCH_SIZE; #endif #if defined HAVE_NETMAP ctx->intf1->netmap_delay = ctx->options->netmap_delay; diff --git a/src/tcpreplay_opts.def b/src/tcpreplay_opts.def index 605a6f45..e4a45ead 100644 --- a/src/tcpreplay_opts.def +++ b/src/tcpreplay_opts.def @@ -133,16 +133,6 @@ level for debugging output. Higher numbers increase verbosity. EOText; }; -flag = { - ifdef = HAVE_LIBXDP; - name = batch-size; - arg-type = number; - arg-range = "1->4096"; - descrip = "The maximum number of packets that can be submitted to the AF_XDP TX ring at once"; - arg-default = 25; - doc = "Higher values may improve performance at the cost of burstiness"; -}; - flag = { name = quiet; value = q; @@ -607,6 +597,16 @@ sending packets may cause equally long delays between printing statistics. EOText; }; +flag = { + ifdef = HAVE_LIBXDP; + name = xdp-batch-size; + arg-type = number; + arg-range = "1->4096"; + descrip = "The maximum number of packets that can be submitted to the AF_XDP TX ring at once"; + arg-default = 25; + doc = "Higher values may improve performance at the cost of accuracy"; +}; + flag = { name = version; value = V; From 6bcac917bfa5121aca2f387b906ec6a9b4e4c533 Mon Sep 17 00:00:00 2001 From: Fred Klassen Date: Mon, 4 Sep 2023 14:06:57 -0700 Subject: [PATCH 7/7] Feature #822 - add --xdp option, cleanup, document --- INSTALL | 303 ---------------------------------------- docs/INSTALL | 28 ++++ src/common/sendpacket.c | 63 +++++---- src/tcpreplay_api.c | 9 ++ src/tcpreplay_api.h | 4 + src/tcpreplay_opts.def | 14 ++ 6 files changed, 93 insertions(+), 328 deletions(-) delete mode 100644 INSTALL diff --git a/INSTALL b/INSTALL deleted file mode 100644 index ae077a6a..00000000 --- a/INSTALL +++ /dev/null @@ -1,303 +0,0 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006, 2014 Free Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - - -Advanced Installation -===================== -Visit http://tcpreplay.appneta.com/wiki/installation.html - - -Basic Installation -================== - - ./configure - make - sudo make install - -Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - - -How to make Tcpreplay go fast -============================= - -1) netmap - ------ -This feature will detect netmap capable network drivers on Linux and -BSD systems. If detected, the network driver is bypassed for the -execution duration of tcpreplay and tcpreplay-edit, and network buffers -will be written to directly. This will allow you to achieve full 10GigE -line rates on commodity 10GigE network adapters, similar to rates -achieved by commercial network traffic generators. - -Note that bypassing the network driver will disrupt other applications -connected through the test interface. Use caution when testing on the -same interface you ssh'ed into. - -Ensure that you have supported NICs installed. Most Intel and nForce -(nVidia) adapters will work. Some virtual adapters are supported. - -FreeBSD 10 and higher already contains netmap capabilities and should -be detected automatically by "configure". But first you must enable -netmap on the system by adding 'device netmap' to your kernel config -and rebuilding the kernel. When complete, /dev/netmap will be -available. - -For Linux, download latest netmap sources from http://info.iet.unipi.it/~luigi/netmap/ -or run 'git clone https://code.google.com/p/netmap/'. You will also need to have -kernel sources installed so the build system can patch the sources and build -netmap-enabled drivers. If kernel sources are in /a/b/c/linux-A.B.C/ , then you -should do: - - cd netmap/LINUX - make KSRC=/a/b/c/linux-A.B.C/ # builds the kernel modules - make KSRC=/a/b/c/linux-A.B.C/ apps # builds sample applications - -You can omit KSRC if your kernel sources are in a standard place. - -Once you load the netmap.lin.ko module on your Linux machine, /dev/netmap -will be available. You will also need to replace your existing network drivers -(beyond the scope of this document). - -Building netmap-aware Tcpreplay suite is relatively straight forward. For -FreeBSD, build normally. For Linux, if you extracted netmap into /usr/src/ you -can also build normally. Otherwise you will have to specify the netmap source -directory, for example: - - ./configure --with-netmap=/home/fklassen/git/netmap - make - sudo make install - - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/docs/INSTALL b/docs/INSTALL index ae077a6a..1a74f66a 100644 --- a/docs/INSTALL +++ b/docs/INSTALL @@ -128,6 +128,34 @@ directory, for example: make sudo make install +2) AF_XDF + ------ + +This feature will detect AF_XDP capable network drivers on Linux. If detected, +the `--xdp` option becomes available, allowing eBPF enabled adapters to be +written to directly. + +This feature requires `libxdp-dev` and `libbpf-dev` packages to be installed. +For example: + + $ ./configure | tail + Linux/BSD netmap: no + Tuntap device support: yes + LIBXDP for AF_XDP socket: yes + $ make + $ sudo make install + $ tcpreplay -i eth0 --xdp test/test.pcap + +If you want to compile a version that only uses AF_XDP, use the `--enable-force-libxdp` +configure option, e.g. + + $ ./configure --enable-force-libxdp | tail + Linux/BSD netmap: no + Tuntap device support: yes + LIBXDP for AF_XDP socket: yes + $ make + $ sudo make install + $ tcpreplay -i eth0 test/test.pcap Compilers and Options ===================== diff --git a/src/common/sendpacket.c b/src/common/sendpacket.c index 3fae5c88..6b060aad 100644 --- a/src/common/sendpacket.c +++ b/src/common/sendpacket.c @@ -572,16 +572,21 @@ sendpacket_open(const char *device, sp = (sendpacket_t *)sendpacket_open_netmap(device, errbuf, arg); else #endif +#ifdef HAVE_LIBXDP + if (sendpacket_type == SP_TYPE_LIBXDP) + sp = sendpacket_open_xsk(device, errbuf); + else +#endif #if defined HAVE_PF_PACKET sp = sendpacket_open_pf(device, errbuf); #elif defined HAVE_BPF - sp = sendpacket_open_bpf(device, errbuf); + sp = sendpacket_open_bpf(device, errbuf); #elif defined HAVE_LIBDNET - sp = sendpacket_open_libdnet(device, errbuf); + sp = sendpacket_open_libdnet(device, errbuf); #elif (defined HAVE_PCAP_INJECT || defined HAVE_PCAP_SENDPACKET) - sp = sendpacket_open_pcap(device, errbuf); + sp = sendpacket_open_pcap(device, errbuf); #elif defined HAVE_LIBXDP - sp = sendpacket_open_xsk(device, errbuf); + sp = sendpacket_open_xsk(device, errbuf); #else #error "No defined packet injection method for sendpacket_open()" #endif @@ -693,8 +698,10 @@ sendpacket_close(sendpacket_t *sp) #endif break; case SP_TYPE_LIBXDP: -#if defined HAVE_LIBXDP +#ifdef HAVE_LIBXDP close(sp->handle.fd); + safe_free(sp->xsk_info); + safe_free(sp->umem_info); #endif break; case SP_TYPE_NONE: @@ -1253,30 +1260,36 @@ sendpacket_get_dlt(sendpacket_t *sp) { int dlt = DLT_EN10MB; - if (sp->handle_type == SP_TYPE_KHIAL || sp->handle_type == SP_TYPE_NETMAP || sp->handle_type == SP_TYPE_TUNTAP) { - /* always EN10MB */ - } else { -#if defined HAVE_BPF - int rcode; + switch (sp->handle_type) { + case SP_TYPE_KHIAL: + case SP_TYPE_NETMAP: + case SP_TYPE_TUNTAP: + case SP_TYPE_LIBXDP: + /* always EN10MB */ + return dlt; + default: + ; + } - if ((rcode = ioctl(sp->handle.fd, BIOCGDLT, &dlt)) < 0) { - warnx("Unable to get DLT value for BPF device (%s): %s", sp->device, strerror(errno)); - return (-1); - } +#if defined HAVE_BPF + if ((ioctl(sp->handle.fd, BIOCGDLT, &dlt)) < 0) { + warnx("Unable to get DLT value for BPF device (%s): %s", sp->device, strerror(errno)); + return (-1); + } #elif defined HAVE_PF_PACKET || defined HAVE_LIBDNET - /* use libpcap to get dlt */ - pcap_t *pcap; - char errbuf[PCAP_ERRBUF_SIZE]; - if ((pcap = pcap_open_live(sp->device, 65535, 0, 0, errbuf)) == NULL) { - warnx("Unable to get DLT value for %s: %s", sp->device, errbuf); - return (-1); - } - dlt = pcap_datalink(pcap); - pcap_close(pcap); + /* use libpcap to get dlt */ + pcap_t *pcap; + char errbuf[PCAP_ERRBUF_SIZE]; + if ((pcap = pcap_open_live(sp->device, 65535, 0, 0, errbuf)) == NULL) { + warnx("Unable to get DLT value for %s: %s", sp->device, errbuf); + return (-1); + } + dlt = pcap_datalink(pcap); + pcap_close(pcap); #elif defined HAVE_PCAP_SENDPACKET || defined HAVE_PCAP_INJECT - dlt = pcap_datalink(sp->handle.pcap); + dlt = pcap_datalink(sp->handle.pcap); #endif - } + return dlt; } diff --git a/src/tcpreplay_api.c b/src/tcpreplay_api.c index 3c14286b..2b992e00 100644 --- a/src/tcpreplay_api.c +++ b/src/tcpreplay_api.c @@ -265,6 +265,15 @@ tcpreplay_post_args(tcpreplay_t *ctx, int argc) #endif } + if (HAVE_OPT(XDP)) { +#ifdef HAVE_LIBXDP + options->xdp = 1; + ctx->sp_type = SP_TYPE_LIBXDP; +#else + err(-1, "--xdp feature was not compiled in. See INSTALL."); +#endif + } + if (HAVE_OPT(UNIQUE_IP)) options->unique_ip = 1; diff --git a/src/tcpreplay_api.h b/src/tcpreplay_api.h index aa46e311..1510443a 100644 --- a/src/tcpreplay_api.h +++ b/src/tcpreplay_api.h @@ -148,6 +148,10 @@ typedef struct tcpreplay_opt_s { int netmap_delay; #endif +#ifdef HAVE_LIBXDP + int xdp; +#endif + /* print flow statistic */ bool flow_stats; int flow_expiry; diff --git a/src/tcpreplay_opts.def b/src/tcpreplay_opts.def index e4a45ead..e05c7b32 100644 --- a/src/tcpreplay_opts.def +++ b/src/tcpreplay_opts.def @@ -528,6 +528,20 @@ are fully up before netmap transmit. Requires netmap option. Default is 10 secon EOText; }; +flag = { + ifdef = HAVE_LIBXDP; + name = xdp; + descrip = "Write packets directly to AF_XDP enabled network adapter"; + doc = <<- EOText +This feature will detect AF_XDP capable network drivers on Linux systems +that have 'libxdp-dev' and 'libbpf-dev' installed. If detected, the network +stack is bypassed and packets are sent directly to an eBPF enabled driver directly. +This will allow you to achieve full line rates on commodity network adapters, similar to rates +achieved by commercial network traffic generators. +EOText; +}; + + flag = { name = no-flow-stats; descrip = "Suppress printing and tracking flow count, rates and expirations";