Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes in IPv6 nexthops cause nl_cache_mngr routes to have duplicate nexthops. #288

Open
tlsalmin opened this issue Jul 21, 2021 · 6 comments

Comments

@tlsalmin
Copy link

Given the test program that uses nl cache manager to listen for route updates:

#include <assert.h>
#include <netlink/netlink.h>
#include <netlink/route/link.h>
#include <netlink/route/route.h>
#include <signal.h>
#include <sys/epoll.h>
#include <sys/signalfd.h>
#include <unistd.h>

static void mngr_update(__attribute__((unused)) struct nl_cache *cache,
                        struct nl_object *obj, int type,
                        __attribute__((unused)) void *context)
{
  char *data;
  size_t data_len;
  struct nl_dump_params dp =
    {
      .dp_type = NL_DUMP_DETAILS,
      .dp_fd = open_memstream(&data, &data_len)
    };

  assert(!strcmp("route/route", nl_object_get_type(obj)));

  nl_object_dump(obj, &dp);

  fclose(dp.dp_fd);

  fprintf(stdout, "Received %d of route [%s]\n", type, data);

  free(data);
}

int main()
{
  int efd = epoll_create(EPOLL_CLOEXEC);

  if (efd != -1)
    {
      sigset_t mask;
      int sigfd;

      sigemptyset(&mask);
      sigaddset(&mask, SIGINT);
      sigaddset(&mask, SIGTERM);
      sigfd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
      if (sigfd != -1)
        {
          struct epoll_event ev = {.events = EPOLLIN,
                                   .data = {
                                     .fd = sigfd,
                                   }};

          if (!epoll_ctl(efd, EPOLL_CTL_ADD, sigfd, &ev))
            {
              struct nl_cache_mngr *mngr = NULL;

              if (!nl_cache_mngr_alloc(NULL, NETLINK_ROUTE, 0, &mngr))
                {
                  int mngr_fd = nl_cache_mngr_get_fd(mngr);
                  ev.data.fd = mngr_fd;
                  ev.events = EPOLLIN;

                  if (!epoll_ctl(efd, EPOLL_CTL_ADD, mngr_fd, &ev))
                    {
                      struct nl_cache *cache = NULL;
                      if (!nl_cache_mngr_add(mngr, "route/route", mngr_update,
                                             NULL, &cache))
                        {
                          while (epoll_wait(efd, &ev, 1, -1) == 1)
                            {
                              if (ev.data.fd == sigfd)
                                {
                                  struct signalfd_siginfo sinfo = {};

                                  if (read(sigfd, &sinfo, sizeof(sinfo)) ==
                                      sizeof(sinfo))
                                    {
                                      if (sinfo.ssi_signo == SIGTERM ||
                                          sinfo.ssi_signo == SIGINT)
                                        {
                                          break;
                                        }
                                    }
                                  else
                                    {
                                      break;
                                    }
                                }
                              else
                                {
                                  assert(ev.data.fd == mngr_fd);
                                  nl_cache_mngr_data_ready(mngr);
                                }
                            }
                        }
                    }
                  nl_cache_mngr_free(mngr);
                }
            }
          close(sigfd);
        }
      close(efd);
    }
}

Any IPv6 route changes will cause the cache handled by nl_cache_mngr to go out of sync:

~/src/random_tests ~clang -Wall -Wextra -I/usr/include/libnl3 nl_mngr_bug.c -o nl_mngr_bug -lnl-genl-3 -lnl-route-3 -lnl-3
~/src/random_tests ~./nl_mngr_bug &
[1] 17247
~/src/random_tests ~ip -6 r
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~sudo ip -6 r a fd99::100/128 via fe80::58e5:27ff:fea6:f594 dev enp34s0
Received 1 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r d fd99::100/128 
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~ip -6 r                                                          
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium

I'll have a look later to patch this. But if someone happens to have more time, go ahead.

Submitted on behalf of Forcepoint Finland.

libnl version was libnl-3.5.0 on gentoo.

@tlsalmin
Copy link
Author

For a workaround, iterate any CHANGE objects. If they have more than one nexthop, loop over the nexthops with iter1, then loop over iter1 + 1 (iter2). If

rtnl_route_nh_compare(iter1, iter2, 0xffffffff, true)

matches, remove iter1 and iterate again until no more matches.

@t0mmmy90
Copy link

t0mmmy90 commented Jul 22, 2021

Hi,
I'm not sure if this is the same issue, but I've encountered a problem with duplicated nexthops while I was working on IPv6 ECMP routes. I've resolved it by adding a small patch to libnl which now I've added as a PR #290.
I hope it will help you!

@tlsalmin
Copy link
Author

Sorry doesn't help with the issue. Applied the patch to 3.5.0 and get the same results

~/src/random_tests ~./nl_mngr_bug &
[1] 73033
~/src/random_tests ~ip -6 r            
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~sudo ip -6 r a fd99::100/128 via fe80::58e5:27ff:fea6:f594 dev enp34s0
Received 1 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::4609:b8ff:fe4e:1a1b dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev enp34s0 <>
]
~/src/random_tests ~ip -6 r
::1 dev lo proto kernel metric 256 pref medium
fd99::100 via fe80::885b:7aff:fe5f:653c dev enp34s0 metric 1024 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~ip -6 r d fd99::100
RTNETLINK answers: Operation not permitted
~/src/random_tests ~sudo ip -6 r d fd99::100
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
]
~/src/random_tests ~

Also my workaround doesn't work with change. I think its the same as the patch. This only works to remove duplicates of the same, but doesn't fix behaviour when NLM_F_REPLACE is in the netlink message.

@t0mmmy90
Copy link

You're right, I was facing an issue with having a nexthop X for example 3 times in the same route and this is why the patch helped.

@tlsalmin
Copy link
Author

I don't see lib/route/route_obj.c taking NLM_F_REPLACE into consideration at all. At least the kernel side in fib6_add_rt2node removes all siblings (nexthops) when NLM_F_REPLACE is present if they are purely RTF_GATEWAY routes.

@tlsalmin
Copy link
Author

Added pull request for fix #293

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants