Skip to content

Commit

Permalink
Set socket protocol only after packet ring configured
Browse files Browse the repository at this point in the history
When using the memory mapped packet receive path on Linux, we find
that if the buffer is "large" we get a number of dropped packets.
This number of packets is roughly proportional to the buffer size.
It turns out that these packets are dropped by the kernel in the time
period between the socket being opened and the ring parameter setup
completing (this is where the kernel has to allocate the buffer memory).

To prevent this, we can open the socket on protocol 0, which the kernel
interprets to mean that we don't want to receive any packets at all (this
is described when opening a memory mapped packet socket for TX only).  We
then set the correct protocol once everything is configured.

Signed-off-by: Mark Marshall <[email protected]>
  • Loading branch information
Mark Marshall committed Nov 19, 2019
1 parent bf0a7f8 commit 97f59a7
Showing 1 changed file with 49 additions and 26 deletions.
75 changes: 49 additions & 26 deletions pcap-linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -1323,17 +1323,21 @@ static void pcap_breakloop_linux(pcap_t *handle)
static int
open_pf_packet_socket(pcap_t *handle, int cooked)
{
int protocol = pcap_protocol(handle);
int sock_fd, ret;

/*
* Open a socket with protocol family packet. If cooked is true,
* we open a SOCK_DGRAM socket for the cooked interface, otherwise
* we open a SOCK_RAW socket for the raw interface.
*
* The protocol is set to 0. This means we will receive no
* packets until we "bind" the socket with a non-zero
* protocol. This allows us to setup the ring buffers without
* dropping any packets.
*/
sock_fd = cooked ?
socket(PF_PACKET, SOCK_DGRAM, protocol) :
socket(PF_PACKET, SOCK_RAW, protocol);
socket(PF_PACKET, SOCK_DGRAM, 0) :
socket(PF_PACKET, SOCK_RAW, 0);

if (sock_fd == -1) {
if (errno == EPERM || errno == EACCES) {
Expand Down Expand Up @@ -1371,6 +1375,7 @@ pcap_activate_linux(pcap_t *handle)
int is_any_device;
struct ifreq ifr;
int status = 0;
int status2 = 0;
int ret;

device = handle->opt.device;
Expand Down Expand Up @@ -1469,42 +1474,60 @@ pcap_activate_linux(pcap_t *handle)
* Success.
* Try to use memory-mapped access.
*/
switch (activate_mmap(handle, &status)) {
ret = activate_mmap(handle, &status);
if (ret == -1) {
/*
* We failed to set up to use it, or the
* kernel supports it, but we failed to
* enable it. status has been set to the
* error status to return and, if it's
* PCAP_ERROR, handle->errbuf contains
* the error message.
*/
goto fail;
}

case 1:

if (ret == 1) {
/*
* We succeeded. status has been
* set to the status to return,
* which might be 0, or might be
* a PCAP_WARNING_ value.
*
* Set the timeout to use in poll() before
* returning.
*/
set_poll_timeout(handlep);
return status;

case 0:
/*
* Kernel doesn't support it - just continue
* with non-memory-mapped access.
* Now that we have activated the mmap ring, we can
* set the correct protocol.
*/
break;

case -1:
if ((status2 = iface_bind(handle->fd, handlep->ifindex,
handle->errbuf, pcap_protocol(handle))) != 0) {
status = status2;
goto fail;
}
/*
* We failed to set up to use it, or the
* kernel supports it, but we failed to
* enable it. status has been set to the
* error status to return and, if it's
* PCAP_ERROR, handle->errbuf contains
* the error message.
* Set the timeout to use in poll() before
* returning.
*/
goto fail;
set_poll_timeout(handlep);
return status;
}

/*
* Kernel doesn't support it (mmap) - just continue
* with non-memory-mapped access.
*/
#endif /* HAVE_PACKET_RING */
}

/*
* We need to set the correct protocol.
*/
if ((status2 = iface_bind(handle->fd, handlep->ifindex,
handle->errbuf, pcap_protocol(handle))) != 0) {
status = status2;
goto fail;
}

/*
* We set up the socket, but not with memory-mapped access.
*/
Expand Down Expand Up @@ -3358,7 +3381,7 @@ activate_sock(pcap_t *handle, int is_any_device)
}

if ((err = iface_bind(sock_fd, handlep->ifindex,
handle->errbuf, pcap_protocol(handle))) != 0) {
handle->errbuf, 0)) != 0) {
close(sock_fd);
return err;
}
Expand Down Expand Up @@ -5279,7 +5302,7 @@ iface_bind(int fd, int ifindex, char *ebuf, int protocol)

memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_ifindex = ifindex;
sll.sll_ifindex = ifindex < 0 ? 0 : ifindex;
sll.sll_protocol = protocol;

if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
Expand Down

0 comments on commit 97f59a7

Please sign in to comment.