diff --git a/INSTALL b/INSTALL deleted file mode 100644 index ae077a6a..00000000 --- a/INSTALL +++ /dev/null @@ -1,303 +0,0 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006, 2014 Free Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - - -Advanced Installation -===================== -Visit http://tcpreplay.appneta.com/wiki/installation.html - - -Basic Installation -================== - - ./configure - make - sudo make install - -Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - - -How to make Tcpreplay go fast -============================= - -1) netmap - ------ -This feature will detect netmap capable network drivers on Linux and -BSD systems. If detected, the network driver is bypassed for the -execution duration of tcpreplay and tcpreplay-edit, and network buffers -will be written to directly. This will allow you to achieve full 10GigE -line rates on commodity 10GigE network adapters, similar to rates -achieved by commercial network traffic generators. - -Note that bypassing the network driver will disrupt other applications -connected through the test interface. Use caution when testing on the -same interface you ssh'ed into. - -Ensure that you have supported NICs installed. Most Intel and nForce -(nVidia) adapters will work. Some virtual adapters are supported. - -FreeBSD 10 and higher already contains netmap capabilities and should -be detected automatically by "configure". But first you must enable -netmap on the system by adding 'device netmap' to your kernel config -and rebuilding the kernel. When complete, /dev/netmap will be -available. - -For Linux, download latest netmap sources from http://info.iet.unipi.it/~luigi/netmap/ -or run 'git clone https://code.google.com/p/netmap/'. You will also need to have -kernel sources installed so the build system can patch the sources and build -netmap-enabled drivers. If kernel sources are in /a/b/c/linux-A.B.C/ , then you -should do: - - cd netmap/LINUX - make KSRC=/a/b/c/linux-A.B.C/ # builds the kernel modules - make KSRC=/a/b/c/linux-A.B.C/ apps # builds sample applications - -You can omit KSRC if your kernel sources are in a standard place. - -Once you load the netmap.lin.ko module on your Linux machine, /dev/netmap -will be available. You will also need to replace your existing network drivers -(beyond the scope of this document). - -Building netmap-aware Tcpreplay suite is relatively straight forward. For -FreeBSD, build normally. For Linux, if you extracted netmap into /usr/src/ you -can also build normally. Otherwise you will have to specify the netmap source -directory, for example: - - ./configure --with-netmap=/home/fklassen/git/netmap - make - sudo make install - - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/configure.ac b/configure.ac index cdc1a9d3..12b6b31a 100644 --- a/configure.ac +++ b/configure.ac @@ -566,6 +566,10 @@ AC_ARG_ENABLE(force-libdnet, AS_HELP_STRING([--enable-force-libdnet],[Force using libdnet for sending packets]), [ AC_DEFINE([FORCE_INJECT_LIBDNET], [1], [Force using libdnet for sending packets])]) +AC_ARG_ENABLE(force-libxdp, + AS_HELP_STRING([--enable-force-libxdp],[Force using libxdp for sending packets]), + [ AC_DEFINE([FORCE_INJECT_LIBXDP], [1], [Force using libxdp for sending packets])]) + AC_ARG_ENABLE(force-inject, AS_HELP_STRING([--enable-force-inject],[Force using libpcap's pcap_inject() for sending packets]), [ AC_DEFINE([FORCE_INJECT_PCAP_INJECT],[1], [Force using libpcap's pcap_inject() for sending packets])]) @@ -844,6 +848,12 @@ fi AC_SEARCH_LIBS([nl_handle_alloc], [nl], [AC_MSG_NOTICE([Unable to find nl library - may be needed by libpcap])]) +AC_CHECK_LIB(bpf, bpf_object__open_file,, + [AC_MSG_NOTICE([Unable to find libbpf library ])]) + +AC_CHECK_LIB(xdp, xsk_umem__delete,, + [AC_MSG_NOTICE([Unable to find libxdp library ])]) + ## ## If not automatically configured, ## check for newer and full-featured libpcap's @@ -1399,6 +1409,36 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ AC_MSG_RESULT(no) ]) +have_libxdp=no +dnl Check for LIBXDP AF_XDP socket support +AC_MSG_CHECKING(for LIBXDP XDP packet sending support) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +#include +]], [[ + struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; + }; + struct xsk_socket xsk; + struct xsk_ring_cons *rxr = NULL; + struct xsk_ring_prod *txr = NULL; + int queue_id = 0; + xsk_socket__create(&xsk, "lo", queue_id, NULL, rxr, txr, NULL); + socket(AF_XDP, SOCK_RAW, 0); +]])],[ + AC_DEFINE([HAVE_LIBXDP], [1], + [Do we have LIBXDP AF_XDP socket support?]) + AC_MSG_RESULT(yes) + have_libxdp=yes +],[ + AC_MSG_RESULT(no) +]) + have_tx_ring=no dnl Check for older Linux TX_RING support AC_MSG_CHECKING(for TX_RING socket sending support) @@ -1420,6 +1460,9 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ AC_MSG_RESULT(no) ]) +AC_CHECK_HEADERS([bpf/libbpf.h]) +AC_CHECK_HEADERS([bpf/bpf.h]) +AC_CHECK_HEADERS([xdp/libxdp.h]) AC_CHECK_HEADERS([net/bpf.h], [have_bpf=yes], [have_bpf=no]) if test $have_bpf = yes ; then @@ -1948,6 +1991,7 @@ pcap_sendpacket: ${have_pcap_sendpacket} ** pcap_netmap ${have_pcap_netmap} Linux/BSD netmap: ${have_netmap} Tuntap device support: ${have_tuntap} +LIBXDP for AF_XDP socket: ${have_libxdp} * In order of preference; see configure --help to override ** Required for tcpbridge diff --git a/docs/INSTALL b/docs/INSTALL index ae077a6a..1a74f66a 100644 --- a/docs/INSTALL +++ b/docs/INSTALL @@ -128,6 +128,34 @@ directory, for example: make sudo make install +2) AF_XDF + ------ + +This feature will detect AF_XDP capable network drivers on Linux. If detected, +the `--xdp` option becomes available, allowing eBPF enabled adapters to be +written to directly. + +This feature requires `libxdp-dev` and `libbpf-dev` packages to be installed. +For example: + + $ ./configure | tail + Linux/BSD netmap: no + Tuntap device support: yes + LIBXDP for AF_XDP socket: yes + $ make + $ sudo make install + $ tcpreplay -i eth0 --xdp test/test.pcap + +If you want to compile a version that only uses AF_XDP, use the `--enable-force-libxdp` +configure option, e.g. + + $ ./configure --enable-force-libxdp | tail + Linux/BSD netmap: no + Tuntap device support: yes + LIBXDP for AF_XDP socket: yes + $ make + $ sudo make install + $ tcpreplay -i eth0 test/test.pcap Compilers and Options ===================== diff --git a/src/common/sendpacket.c b/src/common/sendpacket.c index b21b757c..6b060aad 100644 --- a/src/common/sendpacket.c +++ b/src/common/sendpacket.c @@ -63,6 +63,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PF_PACKET @@ -71,6 +72,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_LIBDNET @@ -79,6 +81,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_BPF @@ -87,6 +90,7 @@ #undef HAVE_PCAP_INJECT #undef HAVE_PCAP_SENDPACKET #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PCAP_INJECT @@ -95,6 +99,7 @@ #undef HAVE_PCAP_SENDPACKET #undef HAVE_BPF #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP #endif #ifdef FORCE_INJECT_PCAP_SENDPACKET @@ -103,6 +108,16 @@ #undef HAVE_PCAP_INJECT #undef HAVE_BPF #undef HAVE_PF_PACKET +#undef HAVE_LIBXDP +#endif + +#ifdef FORCE_INJECT_LIBXDP +#undef HAVE_TX_RING +#undef HAVE_LIBDNET +#undef HAVE_PF_PACKET +#undef HAVE_PCAP_INJECT +#undef HAVE_PCAP_SENDPACKET +#undef HAVE_BPF #endif #if (defined HAVE_WINPCAP && defined HAVE_PCAP_INJECT) @@ -110,16 +125,13 @@ #endif #if !defined HAVE_PCAP_INJECT && !defined HAVE_PCAP_SENDPACKET && !defined HAVE_LIBDNET && !defined HAVE_PF_PACKET && \ - !defined HAVE_BPF && !defined TX_RING -#error You need pcap_inject() or pcap_sendpacket() from libpcap, libdnet, Linux's PF_PACKET/TX_RING or *BSD's BPF + !defined HAVE_BPF && !defined TX_RING && !defined HAVE_LIBXDP +#error You need pcap_inject() or pcap_sendpacket() from libpcap, libdnet, Linux's PF_PACKET/TX_RING/AF_XDP with libxdp or *BSD's BPF #endif #ifdef HAVE_SYS_PARAM_H #include #endif -#ifdef HAVE_SYS_SYSCTL_H -#include -#endif #ifdef HAVE_NET_ROUTE_H #include #endif @@ -211,7 +223,15 @@ static struct tcpr_ether_addr *sendpacket_get_hwaddr_pcap(sendpacket_t *) _U_; #undef INJECT_METHOD #define INJECT_METHOD "pcap_sendpacket()" #endif - +#ifdef HAVE_LIBXDP +#include +static sendpacket_t *sendpacket_open_xsk(const char *, char *) _U_; +static struct tcpr_ether_addr *sendpacket_get_hwaddr_libxdp(sendpacket_t *); +#endif +#if defined HAVE_LIBXDP && !defined INJECT_METHOD +#undef INJECT_METHOD +#define INJECT_METHOD "xsk_ring_prod_submit()" +#endif static void sendpacket_seterr(sendpacket_t *sp, const char *fmt, ...); static sendpacket_t *sendpacket_open_khial(const char *, char *) _U_; static struct tcpr_ether_addr *sendpacket_get_hwaddr_khial(sendpacket_t *) _U_; @@ -237,7 +257,10 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr static const size_t buffer_payload_size = sizeof(buffer) + sizeof(struct pcap_pkthdr); assert(sp); +#ifndef HAVE_LIBXDP + // In case of XDP packet processing we are storing data in sp->packet_processing->xdp_descs assert(data); +#endif if (len == 0) return -1; @@ -452,7 +475,18 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr } #endif /* HAVE_NETMAP */ break; - + case SP_TYPE_LIBXDP: +#ifdef HAVE_LIBXDP + retcode = len; + xsk_ring_prod__submit(&(sp->xsk_info->tx), sp->pckt_count); // submit all packets at once + sp->xsk_info->ring_stats.tx_npkts += sp->pckt_count; + sp->xsk_info->outstanding_tx += sp->pckt_count; + while (sp->xsk_info->outstanding_tx != 0) { + complete_tx_only(sp); + } + sp->sent += sp->pckt_count; +#endif + break; default: errx(-1, "Unsupported sp->handle_type = %d", sp->handle_type); } /* end case */ @@ -465,8 +499,15 @@ sendpacket(sendpacket_t *sp, const u_char *data, size_t len, struct pcap_pkthdr sendpacket_seterr(sp, "Only able to write %d bytes out of %lu bytes total", retcode, len); sp->trunc_packets++; } else { +#ifndef HAVE_LIBXDP sp->bytes_sent += len; sp->sent++; +#else + if (sp->handle_type != SP_TYPE_LIBXDP) { + sp->bytes_sent += len; + sp->sent++; + } +#endif } return retcode; } @@ -531,14 +572,21 @@ sendpacket_open(const char *device, sp = (sendpacket_t *)sendpacket_open_netmap(device, errbuf, arg); else #endif +#ifdef HAVE_LIBXDP + if (sendpacket_type == SP_TYPE_LIBXDP) + sp = sendpacket_open_xsk(device, errbuf); + else +#endif #if defined HAVE_PF_PACKET sp = sendpacket_open_pf(device, errbuf); #elif defined HAVE_BPF - sp = sendpacket_open_bpf(device, errbuf); + sp = sendpacket_open_bpf(device, errbuf); #elif defined HAVE_LIBDNET - sp = sendpacket_open_libdnet(device, errbuf); + sp = sendpacket_open_libdnet(device, errbuf); #elif (defined HAVE_PCAP_INJECT || defined HAVE_PCAP_SENDPACKET) - sp = sendpacket_open_pcap(device, errbuf); + sp = sendpacket_open_pcap(device, errbuf); +#elif defined HAVE_LIBXDP + sp = sendpacket_open_xsk(device, errbuf); #else #error "No defined packet injection method for sendpacket_open()" #endif @@ -647,6 +695,13 @@ sendpacket_close(sendpacket_t *sp) case SP_TYPE_TUNTAP: #ifdef HAVE_TUNTAP close(sp->handle.fd); +#endif + break; + case SP_TYPE_LIBXDP: +#ifdef HAVE_LIBXDP + close(sp->handle.fd); + safe_free(sp->xsk_info); + safe_free(sp->umem_info); #endif break; case SP_TYPE_NONE: @@ -674,6 +729,8 @@ sendpacket_get_hwaddr(sendpacket_t *sp) } else { #if defined HAVE_PF_PACKET addr = sendpacket_get_hwaddr_pf(sp); +#elif defined HAVE_LIBXDP + addr = sendpacket_get_hwaddr_libxdp(sp); #elif defined HAVE_BPF addr = sendpacket_get_hwaddr_bpf(sp); #elif defined HAVE_LIBDNET @@ -1203,30 +1260,36 @@ sendpacket_get_dlt(sendpacket_t *sp) { int dlt = DLT_EN10MB; - if (sp->handle_type == SP_TYPE_KHIAL || sp->handle_type == SP_TYPE_NETMAP || sp->handle_type == SP_TYPE_TUNTAP) { - /* always EN10MB */ - } else { -#if defined HAVE_BPF - int rcode; + switch (sp->handle_type) { + case SP_TYPE_KHIAL: + case SP_TYPE_NETMAP: + case SP_TYPE_TUNTAP: + case SP_TYPE_LIBXDP: + /* always EN10MB */ + return dlt; + default: + ; + } - if ((rcode = ioctl(sp->handle.fd, BIOCGDLT, &dlt)) < 0) { - warnx("Unable to get DLT value for BPF device (%s): %s", sp->device, strerror(errno)); - return (-1); - } +#if defined HAVE_BPF + if ((ioctl(sp->handle.fd, BIOCGDLT, &dlt)) < 0) { + warnx("Unable to get DLT value for BPF device (%s): %s", sp->device, strerror(errno)); + return (-1); + } #elif defined HAVE_PF_PACKET || defined HAVE_LIBDNET - /* use libpcap to get dlt */ - pcap_t *pcap; - char errbuf[PCAP_ERRBUF_SIZE]; - if ((pcap = pcap_open_live(sp->device, 65535, 0, 0, errbuf)) == NULL) { - warnx("Unable to get DLT value for %s: %s", sp->device, errbuf); - return (-1); - } - dlt = pcap_datalink(pcap); - pcap_close(pcap); + /* use libpcap to get dlt */ + pcap_t *pcap; + char errbuf[PCAP_ERRBUF_SIZE]; + if ((pcap = pcap_open_live(sp->device, 65535, 0, 0, errbuf)) == NULL) { + warnx("Unable to get DLT value for %s: %s", sp->device, errbuf); + return (-1); + } + dlt = pcap_datalink(pcap); + pcap_close(pcap); #elif defined HAVE_PCAP_SENDPACKET || defined HAVE_PCAP_INJECT - dlt = pcap_datalink(sp->handle.pcap); + dlt = pcap_datalink(sp->handle.pcap); #endif - } + return dlt; } @@ -1294,3 +1357,156 @@ sendpacket_abort(sendpacket_t *sp) sp->abort = true; } +#ifdef HAVE_LIBXDP +static struct xsk_socket_info * +xsk_configure_socket(struct xsk_umem_info *umem, struct xsk_socket_config *cfg, int queue_id, const char *device) +{ + struct xsk_socket_info *xsk; + struct xsk_ring_cons *rxr = NULL; + int ret; + + xsk = (struct xsk_socket_info *)safe_malloc(sizeof(struct xsk_socket_info)); + xsk->umem = umem; + ret = xsk_socket__create(&xsk->xsk, device, queue_id, umem->umem, rxr, &xsk->tx, cfg); + if (ret) { + return NULL; + } + + memset(&xsk->app_stats, 0, sizeof(xsk->app_stats)); + + return xsk; +} + +static sendpacket_t * +sendpacket_open_xsk(const char *device, char *errbuf) +{ + sendpacket_t *sp; + + assert(device); + assert(errbuf); + + int nb_of_frames = 4096; + int frame_size = 4096; + int nb_of_completion_queue_desc = 4096; + int nb_of_fill_queue_desc = 4096; + struct xsk_umem_info *umem_info = + create_umem_area(nb_of_frames, frame_size, nb_of_completion_queue_desc, nb_of_fill_queue_desc); + if (umem_info == NULL) { + return NULL; + } + + int nb_of_tx_queue_desc = 4096; + int nb_of_rx_queue_desc = 4096; + u_int32_t queue_id = 0; + struct xsk_socket_info *xsk_info = + create_xsk_socket(umem_info, nb_of_tx_queue_desc, nb_of_rx_queue_desc, device, queue_id, errbuf); + if (xsk_info == NULL) { + return NULL; + } + + sp = (sendpacket_t *)safe_malloc(sizeof(sendpacket_t)); + strlcpy(sp->device, device, sizeof(sp->device)); + sp->handle.fd = xsk_info->xsk->fd; + sp->handle_type = SP_TYPE_LIBXDP; + sp->xsk_info = xsk_info; + sp->umem_info = umem_info; + sp->frame_size = frame_size; + sp->tx_size = nb_of_tx_queue_desc; + return sp; +} + +struct xsk_umem_info * +create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_descs, int nb_of_fill_queue_descs) +{ + int umem_size = nb_of_frames * frame_size; + struct xsk_umem_info *umem; + void *umem_area = NULL; + struct xsk_umem_config cfg = {/* We recommend that you set the fill ring size >= HW RX ring size + + * AF_XDP RX ring size. Make sure you fill up the fill ring + * with buffers at regular intervals, and you will with this setting + * avoid allocation failures in the driver. These are usually quite + * expensive since drivers have not been written to assume that + * allocation failures are common. For regular sockets, kernel + * allocated memory is used that only runs out in OOM situations + * that should be rare. + */ + .fill_size = nb_of_fill_queue_descs * 2, + .comp_size = nb_of_completion_queue_descs, + .frame_size = frame_size, + .frame_headroom = 0, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG}; + umem = (struct xsk_umem_info *)safe_malloc(sizeof(struct xsk_umem_info)); + if (posix_memalign(&umem_area, + getpagesize(), /* PAGE_SIZE aligned */ + umem_size)) { + fprintf(stderr, "ERROR: Can't allocate buffer memory \"%s\"\n", strerror(errno)); + exit(EXIT_FAILURE); + } + int ret = xsk_umem__create(&umem->umem, umem_area, umem_size, &umem->fq, &umem->cq, &cfg); + umem->buffer = umem_area; + if (ret != 0) { + return NULL; + } + return umem; +} + +struct xsk_socket_info * +create_xsk_socket(struct xsk_umem_info *umem_info, + int nb_of_tx_queue_desc, + int nb_of_rx_queue_desc, + const char *device, + u_int32_t queue_id, + char *errbuf) +{ + struct xsk_socket_info *xsk_info = (struct xsk_socket_info *)safe_malloc(sizeof(struct xsk_socket_info)); + struct xsk_socket_config *socket_config = (struct xsk_socket_config *)safe_malloc(sizeof(struct xsk_socket_config)); + + socket_config->rx_size = nb_of_rx_queue_desc; + socket_config->tx_size = nb_of_tx_queue_desc; + socket_config->libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; + socket_config->bind_flags = 0; // XDP_FLAGS_SKB_MODE (1U << 1) or XDP_FLAGS_DRV_MODE (1U << 2) + xsk_info = xsk_configure_socket(umem_info, socket_config, queue_id, device); + + if (xsk_info == NULL) { + snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "AF_XDP socket configuration is not successful: %s", strerror(errno)); + return NULL; + } + return xsk_info; +} + +/* + * gets the hardware address via Linux's PF packet interface + */ +static _U_ struct tcpr_ether_addr * +sendpacket_get_hwaddr_libxdp(sendpacket_t *sp) +{ + struct ifreq ifr; + int fd; + + assert(sp); + + if (!sp->open) { + sendpacket_seterr(sp, "Unable to get hardware address on un-opened sendpacket handle"); + return NULL; + } + + /* create dummy socket for ioctl */ + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + sendpacket_seterr(sp, "Unable to open dummy socket for get_hwaddr: %s", strerror(errno)); + return NULL; + } + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, sp->device, sizeof(ifr.ifr_name)); + + if (ioctl(fd, SIOCGIFHWADDR, (int8_t *)&ifr) < 0) { + close(fd); + sendpacket_seterr(sp, "Error getting hardware address: %s", strerror(errno)); + return NULL; + } + + memcpy(&sp->ether, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + close(fd); + return (&sp->ether); +} +#endif /* HAVE_LIBXDP */ diff --git a/src/common/sendpacket.h b/src/common/sendpacket.h index e1dd6a42..73f2cddd 100644 --- a/src/common/sendpacket.h +++ b/src/common/sendpacket.h @@ -67,7 +67,8 @@ typedef enum sendpacket_type_e { SP_TYPE_TX_RING, SP_TYPE_KHIAL, SP_TYPE_NETMAP, - SP_TYPE_TUNTAP + SP_TYPE_TUNTAP, + SP_TYPE_LIBXDP } sendpacket_type_t; /* these are the file_operations ioctls */ @@ -91,6 +92,71 @@ union sendpacket_handle { #define SENDPACKET_ERRBUF_SIZE 1024 #define MAX_IFNAMELEN 64 +#ifdef HAVE_LIBXDP +#include +#include +#include +#include + +struct xsk_ring_stats { + unsigned long rx_npkts; + unsigned long tx_npkts; + unsigned long rx_dropped_npkts; + unsigned long rx_invalid_npkts; + unsigned long tx_invalid_npkts; + unsigned long rx_full_npkts; + unsigned long rx_fill_empty_npkts; + unsigned long tx_empty_npkts; + unsigned long prev_rx_npkts; + unsigned long prev_tx_npkts; + unsigned long prev_rx_dropped_npkts; + unsigned long prev_rx_invalid_npkts; + unsigned long prev_tx_invalid_npkts; + unsigned long prev_rx_full_npkts; + unsigned long prev_rx_fill_empty_npkts; + unsigned long prev_tx_empty_npkts; +}; +struct xsk_driver_stats { + unsigned long intrs; + unsigned long prev_intrs; +}; +struct xsk_app_stats { + unsigned long rx_empty_polls; + unsigned long fill_fail_polls; + unsigned long copy_tx_sendtos; + unsigned long tx_wakeup_sendtos; + unsigned long opt_polls; + unsigned long prev_rx_empty_polls; + unsigned long prev_fill_fail_polls; + unsigned long prev_copy_tx_sendtos; + unsigned long prev_tx_wakeup_sendtos; + unsigned long prev_opt_polls; +}; +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + void *buffer; +}; +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; +}; +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct xsk_ring_stats ring_stats; + struct xsk_app_stats app_stats; + struct xsk_driver_stats drv_stats; + u_int32_t outstanding_tx; +}; +#endif /* HAVE_LIBXDP */ + struct sendpacket_s { tcpr_dir_t cache_dir; int open; @@ -140,12 +206,65 @@ struct sendpacket_s { #ifdef HAVE_TX_RING txring_t *tx_ring; #endif +#endif +#ifdef HAVE_LIBXDP + struct xsk_socket_info *xsk_info; + struct xsk_umem_info *umem_info; + unsigned int batch_size; + unsigned int pckt_count; + int frame_size; + unsigned int tx_idx; + int tx_size; #endif bool abort; }; - typedef struct sendpacket_s sendpacket_t; +#ifdef HAVE_LIBXDP +struct xsk_umem_info * +create_umem_area(int nb_of_frames, int frame_size, int nb_of_completion_queue_descs, int nb_of_fill_queue_descs); +struct xsk_socket_info *create_xsk_socket(struct xsk_umem_info *umem, + int nb_of_tx_queue_desc, + int nb_of_rx_queue_desc, + const char *device, + u_int32_t queue_id, + char *errbuf); +static inline void +gen_eth_frame(struct xsk_umem_info *umem, u_int64_t addr, u_char *pkt_data, COUNTER pkt_size) +{ + memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, pkt_size); +} + +static inline void +kick_tx(struct xsk_socket_info *xsk) +{ + int ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + return; + } + printf("%s\n", "Packet sending exited with error!"); + exit (1); +} + +static inline void +complete_tx_only(sendpacket_t *sp) +{ + u_int32_t completion_idx = 0; + if (sp->xsk_info->outstanding_tx == 0) { + return; + } + if (xsk_ring_prod__needs_wakeup(&(sp->xsk_info->tx))) { + sp->xsk_info->app_stats.tx_wakeup_sendtos++; + kick_tx(sp->xsk_info); + } + unsigned int rcvd = xsk_ring_cons__peek(&sp->xsk_info->umem->cq, sp->pckt_count, &completion_idx); + if (rcvd > 0) { + xsk_ring_cons__release(&sp->xsk_info->umem->cq, rcvd); + sp->xsk_info->outstanding_tx -= rcvd; + } +} +#endif /* HAVE_LIBXDP */ + int sendpacket(sendpacket_t *, const u_char *, size_t, struct pcap_pkthdr *); void sendpacket_close(sendpacket_t *); char *sendpacket_geterr(sendpacket_t *); diff --git a/src/defines.h.in b/src/defines.h.in index 364b215b..ee5d2425 100644 --- a/src/defines.h.in +++ b/src/defines.h.in @@ -48,6 +48,22 @@ #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 #endif +#ifdef HAVE_LIBBPF +#undef HAVE_BPF +#include +#include +#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 + +struct bpf_program { +char dummy[0]; +}; + +#endif + +#ifdef HAVE_LIBXDP +#include +#endif + #if defined INCLUDE_PCAP_BPF_H_FILE && !defined PCAP_DONT_INCLUDE_PCAP_BPF_H #include <@INCLUDE_PCAP_BPF_HEADER@> #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 /* don't re-include it in pcap.h */ diff --git a/src/send_packets.c b/src/send_packets.c index d3969e42..e9e445d9 100644 --- a/src/send_packets.c +++ b/src/send_packets.c @@ -66,8 +66,11 @@ static void calc_sleep_time(tcpreplay_t *ctx, COUNTER start_us, COUNTER *skip_length); static void tcpr_sleep(tcpreplay_t *ctx, sendpacket_t *sp _U_, struct timespec *nap_this_time, struct timeval *now); -static u_char * -get_next_packet(tcpreplay_t *ctx, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int file_idx, packet_cache_t **prev_packet); +static u_char *get_next_packet(tcpreplay_opt_t *options, + pcap_t *pcap, + struct pcap_pkthdr *pkthdr, + int file_idx, + packet_cache_t **prev_packet); static uint32_t get_user_count(tcpreplay_t *ctx, sendpacket_t *sp, COUNTER counter); #ifdef HAVE_NETMAP @@ -301,7 +304,7 @@ preload_pcap_file(tcpreplay_t *ctx, int idx) dlt = pcap_datalink(pcap); /* loop through the pcap. get_next_packet() builds the cache for us! */ - while ((pktdata = get_next_packet(ctx, pcap, &pkthdr, idx, prev_packet)) != NULL) { + while ((pktdata = get_next_packet(options, pcap, &pkthdr, idx, prev_packet)) != NULL) { if (options->flow_stats) update_flow_stats(ctx, NULL, &pkthdr, pktdata, dlt); } @@ -353,6 +356,7 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) bool top_speed = (options->speed.mode == speed_topspeed || (options->speed.mode == speed_mbpsrate && options->speed.speed == 0)); bool now_is_now = true; + bool read_next_packet = true; // used for LIBXDP batch packet processing with cached packets gettimeofday(&now, NULL); if (!timerisset(&stats->start_time)) { @@ -381,7 +385,8 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) * Keep sending while we have packets or until * we've sent enough packets */ - while (!ctx->abort && (pktdata = get_next_packet(ctx, pcap, &pkthdr, idx, prev_packet)) != NULL) { + while (!ctx->abort && read_next_packet && + (pktdata = get_next_packet(options, pcap, &pkthdr, idx, prev_packet)) != NULL) { now_is_now = false; packetnum++; #if defined TCPREPLAY || defined TCPREPLAY_EDIT @@ -487,8 +492,18 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) /* * we know how long to sleep between sends, now do it. */ - if (!top_speed) + if (!top_speed) { +#ifndef HAVE_LIBXDP tcpr_sleep(ctx, sp, &ctx->nap, &now); +#else + if (sp->handle_type != SP_TYPE_LIBXDP) { + tcpr_sleep(ctx, sp, &ctx->nap, &now); + } else if (sp->batch_size == 1) { + // In case of LIBXDP packet processing waiting only makes sense when batch size is one + tcpr_sleep(ctx, sp, &ctx->nap, &now); + } +#endif + } } #ifdef ENABLE_VERBOSE @@ -497,6 +512,18 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) tcpdump_print(options->tcpdump, &pkthdr, pktdata); #endif +#ifdef HAVE_LIBXDP + if (sp->handle_type == SP_TYPE_LIBXDP) { + /* Reserve frames for the batc h*/ + while (xsk_ring_prod__reserve(&(sp->xsk_info->tx), sp->batch_size, &sp->tx_idx) < sp->batch_size) { + complete_tx_only(sp); + } + /* The first packet is already in memory */ + prepare_first_element_of_batch(ctx, &packetnum, pktdata, pkthdr.len); + /* Read more packets and prepare batch */ + prepare_remaining_elements_of_batch(ctx, &packetnum, &read_next_packet, pcap, &idx, pkthdr, prev_packet); + } +#endif dbgx(2, "Sending packet #" COUNTER_SPEC, packetnum); /* write packet out on network */ if (sendpacket(sp, pktdata, pktlen, &pkthdr) < (int)pktlen) { @@ -514,8 +541,12 @@ send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx) #endif stats->pkts_sent++; +#ifndef HAVE_LIBXDP stats->bytes_sent += pktlen; - +#else + if (sp->handle_type != SP_TYPE_LIBXDP) + stats->bytes_sent += pktlen; +#endif /* print stats during the run? */ if (options->stats > 0) { if (!timerisset(&stats->last_print)) { @@ -619,8 +650,8 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * prev_packet2 = NULL; } - pktdata1 = get_next_packet(ctx, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); - pktdata2 = get_next_packet(ctx, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); + pktdata1 = get_next_packet(options, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); + pktdata2 = get_next_packet(options, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); /* MAIN LOOP * Keep sending while we have packets or until @@ -800,9 +831,9 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * /* get the next packet for this file handle depending on which we last used */ if (sp == ctx->intf2) { - pktdata2 = get_next_packet(ctx, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); + pktdata2 = get_next_packet(options, pcap2, &pkthdr2, cache_file_idx2, prev_packet2); } else { - pktdata1 = get_next_packet(ctx, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); + pktdata1 = get_next_packet(options, pcap1, &pkthdr1, cache_file_idx1, prev_packet1); } /* stop sending based on the duration limit... */ @@ -845,9 +876,8 @@ send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int cache_file_idx1, pcap_t * * will be updated as new entries are added (or retrieved) from the cache list. */ u_char * -get_next_packet(tcpreplay_t *ctx, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int idx, packet_cache_t **prev_packet) +get_next_packet(tcpreplay_opt_t *options, pcap_t *pcap, struct pcap_pkthdr *pkthdr, int idx, packet_cache_t **prev_packet) { - tcpreplay_opt_t *options = ctx->options; u_char *pktdata = NULL; uint32_t pktlen; @@ -1215,3 +1245,78 @@ get_user_count(tcpreplay_t *ctx, sendpacket_t *sp, COUNTER counter) return (uint32_t)send; } + +#ifdef HAVE_LIBXDP +void +check_packet_fits_in_umem_frame(sendpacket_t *sp, int packet_len) +{ + if (packet_len > sp->frame_size) { + fprintf(stderr, + "ERROR: packet size cannot be larger than the size of an UMEM frame! Packet size: %i Frame size: %i\n", + packet_len, + sp->frame_size); + free_umem_and_xsk(sp); + exit(-1); + } +} + +void +fill_umem_with_data_and_set_xdp_desc(sendpacket_t *sp, int tx_idx, COUNTER umem_index, u_char *pktdata, int len) +{ + check_packet_fits_in_umem_frame(sp, len); + COUNTER umem_index_mod = (umem_index % sp->batch_size) * sp->frame_size; // packets are sent in batch, after each + // batch umem memory is reusable + gen_eth_frame(sp->umem_info, umem_index_mod, pktdata, len); + struct xdp_desc *xdp_desc = xsk_ring_prod__tx_desc(&(sp->xsk_info->tx), tx_idx); + xdp_desc->addr = (COUNTER)(umem_index_mod); + xdp_desc->len = len; +} + +void +prepare_first_element_of_batch(tcpreplay_t *ctx, COUNTER *packetnum, u_char *pktdata, u_int32_t len) +{ + sendpacket_t *sp = ctx->intf1; + tcpreplay_stats_t *stats = &ctx->stats; + fill_umem_with_data_and_set_xdp_desc(sp, sp->tx_idx, *packetnum - 1, pktdata, len); + sp->bytes_sent += len; + stats->bytes_sent += len; +} + +void +prepare_remaining_elements_of_batch(tcpreplay_t *ctx, + COUNTER *packetnum, + bool *read_next_packet, + pcap_t *pcap, + int *idx, + struct pcap_pkthdr pkthdr, + packet_cache_t **prev_packet) +{ + sendpacket_t *sp = ctx->intf1; + tcpreplay_stats_t *stats = &ctx->stats; + int datalink = ctx->options->file_cache[*idx].dlt; + bool preload = ctx->options->file_cache[*idx].cached; + u_char *pktdata = NULL; + unsigned int pckt_count = 1; + while (!ctx->abort && (pckt_count < sp->batch_size) && + (pktdata = get_next_packet(ctx->options, pcap, &pkthdr, *idx, prev_packet)) != NULL) { + fill_umem_with_data_and_set_xdp_desc(sp, sp->tx_idx + pckt_count, *packetnum, pktdata, pkthdr.len); + ++pckt_count; + ++*packetnum; + stats->bytes_sent += pkthdr.len; + sp->bytes_sent += pkthdr.len; + stats->pkts_sent++; + if (ctx->options->flow_stats && !preload) { + update_flow_stats(ctx, ctx->options->cache_packets ? sp : NULL, &pkthdr, pktdata, datalink); + } + } + if (pckt_count < sp->batch_size) { + // No more packets to read, it is essential for cached packet processing + *read_next_packet = false; + } + sp->pckt_count = pckt_count; + dbgx(2, + "Sending packets with LIBXDP in batch, packet numbers from " COUNTER_SPEC " to " COUNTER_SPEC "\n", + *packetnum - pckt_count + 1, + *packetnum); +} +#endif /* HAVE_LIBXDP */ diff --git a/src/send_packets.h b/src/send_packets.h index eb60be40..7756c287 100644 --- a/src/send_packets.h +++ b/src/send_packets.h @@ -27,3 +27,14 @@ void send_packets(tcpreplay_t *ctx, pcap_t *pcap, int idx); void send_dual_packets(tcpreplay_t *ctx, pcap_t *pcap1, int idx1, pcap_t *pcap2, int idx2); void *cache_mode(tcpreplay_t *ctx, char *cachedata, COUNTER packet_num); void preload_pcap_file(tcpreplay_t *ctx, int idx); +#ifdef HAVE_LIBXDP +void prepare_remaining_elements_of_batch(tcpreplay_t *ctx, + COUNTER *packetnum, + bool *read_next_packet, + pcap_t *pcap, + int *idx, + struct pcap_pkthdr pkthdr, + packet_cache_t **prev_packet); +void prepare_first_element_of_batch(tcpreplay_t *ctx, COUNTER *packetnum, u_char *pktdata, u_int32_t len); +void fill_umem_with_data_and_set_xdp_desc(sendpacket_t *sp, int tx_idx, COUNTER umem_index, u_char *pktdata, int len); +#endif diff --git a/src/tcpedit/plugins/dlt_en10mb/en10mb.c b/src/tcpedit/plugins/dlt_en10mb/en10mb.c index 0c24d8e5..957ce20d 100644 --- a/src/tcpedit/plugins/dlt_en10mb/en10mb.c +++ b/src/tcpedit/plugins/dlt_en10mb/en10mb.c @@ -184,7 +184,7 @@ dlt_en10mb_parse_subsmac(tcpeditdlt_t *ctx, en10mb_config_t *config, const char { size_t input_len = strlen(input); size_t possible_entries_number = (input_len / (SUBSMAC_ENTRY_LEN + 1)) + 1; - int entry; + size_t entry; en10mb_sub_entry_t *entries = safe_malloc(possible_entries_number * sizeof(en10mb_sub_entry_t)); @@ -524,7 +524,7 @@ dlt_en10mb_encode(tcpeditdlt_t *ctx, u_char *packet, int pktlen, tcpr_dir_t dir) newl2len = TCPR_802_1Q_H; } - if (pktlen < newl2len || pktlen + newl2len - ctx->l2len > MAXPACKET) { + if ((uint32_t)pktlen < newl2len || pktlen + newl2len - ctx->l2len > MAXPACKET) { tcpedit_seterr(ctx->tcpedit, "Unable to process packet #" COUNTER_SPEC " since its new length is %d bytes.", ctx->tcpedit->runtime.packetnum, diff --git a/src/tcpreplay_api.c b/src/tcpreplay_api.c index d429b91b..2b992e00 100644 --- a/src/tcpreplay_api.c +++ b/src/tcpreplay_api.c @@ -265,6 +265,15 @@ tcpreplay_post_args(tcpreplay_t *ctx, int argc) #endif } + if (HAVE_OPT(XDP)) { +#ifdef HAVE_LIBXDP + options->xdp = 1; + ctx->sp_type = SP_TYPE_LIBXDP; +#else + err(-1, "--xdp feature was not compiled in. See INSTALL."); +#endif + } + if (HAVE_OPT(UNIQUE_IP)) options->unique_ip = 1; @@ -358,7 +367,9 @@ tcpreplay_post_args(tcpreplay_t *ctx, int argc) ret = -1; goto out; } - +#ifdef HAVE_LIBXDP + ctx->intf1->batch_size = OPT_VALUE_XDP_BATCH_SIZE; +#endif #if defined HAVE_NETMAP ctx->intf1->netmap_delay = ctx->options->netmap_delay; #endif @@ -429,6 +440,15 @@ tcpreplay_close(tcpreplay_t *ctx) assert(ctx->options); options = ctx->options; +#ifdef HAVE_LIBXDP + if (ctx->intf1->handle_type == SP_TYPE_LIBXDP) { + free_umem_and_xsk(ctx->intf1); + if (ctx->intf2) { + free_umem_and_xsk(ctx->intf2); + } + } +#endif + safe_free(options->intf1_name); safe_free(options->intf2_name); sendpacket_close(ctx->intf1); @@ -1157,6 +1177,13 @@ tcpreplay_replay(tcpreplay_t *ctx) if (ctx->options->stats == 0) packet_stats(&ctx->stats); } +#ifdef HAVE_LIBXDP + sendpacket_t *sp = ctx->intf1; + if (sp->handle_type == SP_TYPE_LIBXDP) { + sp->xsk_info->tx.cached_prod = 0; + sp->xsk_info->tx.cached_cons = sp->tx_size; + } +#endif } } else { while (!ctx->abort) { /* loop forever unless user aborts */ @@ -1356,3 +1383,36 @@ int tcpreplay_get_flow_expiry(tcpreplay_t *ctx) return ctx->options->flow_expiry; } + +#ifdef HAVE_LIBXDP +void +delete_xsk_socket(struct xsk_socket *xsk) +{ + size_t desc_sz = sizeof(struct xdp_desc); + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, off.tx.desc + xsk->config.tx_size * desc_sz); + } + } + close(xsk->fd); +} + +void +free_umem_and_xsk(sendpacket_t *sp) +{ + xsk_umem__delete(sp->xsk_info->umem->umem); + delete_xsk_socket(sp->xsk_info->xsk); +} +#endif /* HAVE_LIBXDP */ diff --git a/src/tcpreplay_api.h b/src/tcpreplay_api.h index 88c03981..1510443a 100644 --- a/src/tcpreplay_api.h +++ b/src/tcpreplay_api.h @@ -33,6 +33,10 @@ #ifdef ENABLE_DMALLOC #include #endif +#ifdef HAVE_LIBXDP +#include +#include +#endif #ifdef __cplusplus extern "C" { @@ -144,6 +148,10 @@ typedef struct tcpreplay_opt_s { int netmap_delay; #endif +#ifdef HAVE_LIBXDP + int xdp; +#endif + /* print flow statistic */ bool flow_stats; int flow_expiry; @@ -272,6 +280,10 @@ int tcpreplay_set_tcpdump(tcpreplay_t *, tcpdump_t *); void __tcpreplay_seterr(tcpreplay_t *ctx, const char *func, const int line, const char *file, const char *fmt, ...); void tcpreplay_setwarn(tcpreplay_t *ctx, const char *fmt, ...); +#ifdef HAVE_LIBXDP +void delete_xsk_socket(struct xsk_socket *xsk); +void free_umem_and_xsk(sendpacket_t *sp); +#endif #ifdef __cplusplus } #endif diff --git a/src/tcpreplay_opts.def b/src/tcpreplay_opts.def index a0036f76..e05c7b32 100644 --- a/src/tcpreplay_opts.def +++ b/src/tcpreplay_opts.def @@ -528,6 +528,20 @@ are fully up before netmap transmit. Requires netmap option. Default is 10 secon EOText; }; +flag = { + ifdef = HAVE_LIBXDP; + name = xdp; + descrip = "Write packets directly to AF_XDP enabled network adapter"; + doc = <<- EOText +This feature will detect AF_XDP capable network drivers on Linux systems +that have 'libxdp-dev' and 'libbpf-dev' installed. If detected, the network +stack is bypassed and packets are sent directly to an eBPF enabled driver directly. +This will allow you to achieve full line rates on commodity network adapters, similar to rates +achieved by commercial network traffic generators. +EOText; +}; + + flag = { name = no-flow-stats; descrip = "Suppress printing and tracking flow count, rates and expirations"; @@ -597,6 +611,16 @@ sending packets may cause equally long delays between printing statistics. EOText; }; +flag = { + ifdef = HAVE_LIBXDP; + name = xdp-batch-size; + arg-type = number; + arg-range = "1->4096"; + descrip = "The maximum number of packets that can be submitted to the AF_XDP TX ring at once"; + arg-default = 25; + doc = "Higher values may improve performance at the cost of accuracy"; +}; + flag = { name = version; value = V;