]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net: fib: restore ECMP balance from loopback
authorVadim Fedorenko <vadim.fedorenko@linux.dev>
Sun, 21 Dec 2025 19:26:38 +0000 (19:26 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 30 Dec 2025 10:07:38 +0000 (11:07 +0100)
Preference of nexthop with source address broke ECMP for packets with
source addresses which are not in the broadcast domain, but rather added
to loopback/dummy interfaces. Original behaviour was to balance over
nexthops while now it uses the latest nexthop from the group. To fix the
issue introduce next hop scoring system where next hops with source
address equal to requested will always have higher priority.

For the case with 198.51.100.1/32 assigned to dummy0 and routed using
192.0.2.0/24 and 203.0.113.0/24 networks:

2: dummy0: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
    link/ether d6:54:8a:ff:78:f5 brd ff:ff:ff:ff:ff:ff
    inet 198.51.100.1/32 scope global dummy0
       valid_lft forever preferred_lft forever
7: veth1@if6: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 06:ed:98:87:6d:8a brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 192.0.2.2/24 scope global veth1
       valid_lft forever preferred_lft forever
    inet6 fe80::4ed:98ff:fe87:6d8a/64 scope link proto kernel_ll
       valid_lft forever preferred_lft forever
9: veth3@if8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether ae:75:23:38:a0:d2 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 203.0.113.2/24 scope global veth3
       valid_lft forever preferred_lft forever
    inet6 fe80::ac75:23ff:fe38:a0d2/64 scope link proto kernel_ll
       valid_lft forever preferred_lft forever

~ ip ro list:
default
nexthop via 192.0.2.1 dev veth1 weight 1
nexthop via 203.0.113.1 dev veth3 weight 1
192.0.2.0/24 dev veth1 proto kernel scope link src 192.0.2.2
203.0.113.0/24 dev veth3 proto kernel scope link src 203.0.113.2

before:
   for i in {1..255} ; do ip ro get 10.0.0.$i; done | grep veth | awk ' {print $(NF-2)}' | sort | uniq -c:
    255 veth3

after:
   for i in {1..255} ; do ip ro get 10.0.0.$i; done | grep veth | awk ' {print $(NF-2)}' | sort | uniq -c:
    122 veth1
    133 veth3

Fixes: 32607a332cfe ("ipv4: prefer multipath nexthop that matches source address")
Signed-off-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20251221192639.3911901-1-vadim.fedorenko@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/ipv4/fib_semantics.c

index a5f3c8459758f10850673583a886d2b212940596..0caf38e44c738c32a1de7ee808f10b33632ce108 100644 (file)
@@ -2167,8 +2167,8 @@ void fib_select_multipath(struct fib_result *res, int hash,
 {
        struct fib_info *fi = res->fi;
        struct net *net = fi->fib_net;
-       bool found = false;
        bool use_neigh;
+       int score = -1;
        __be32 saddr;
 
        if (unlikely(res->fi->nh)) {
@@ -2180,7 +2180,7 @@ void fib_select_multipath(struct fib_result *res, int hash,
        saddr = fl4 ? fl4->saddr : 0;
 
        change_nexthops(fi) {
-               int nh_upper_bound;
+               int nh_upper_bound, nh_score = 0;
 
                /* Nexthops without a carrier are assigned an upper bound of
                 * minus one when "ignore_routes_with_linkdown" is set.
@@ -2190,24 +2190,18 @@ void fib_select_multipath(struct fib_result *res, int hash,
                    (use_neigh && !fib_good_nh(nexthop_nh)))
                        continue;
 
-               if (!found) {
+               if (saddr && nexthop_nh->nh_saddr == saddr)
+                       nh_score += 2;
+               if (hash <= nh_upper_bound)
+                       nh_score++;
+               if (score < nh_score) {
                        res->nh_sel = nhsel;
                        res->nhc = &nexthop_nh->nh_common;
-                       found = !saddr || nexthop_nh->nh_saddr == saddr;
+                       if (nh_score == 3 || (!saddr && nh_score == 1))
+                               return;
+                       score = nh_score;
                }
 
-               if (hash > nh_upper_bound)
-                       continue;
-
-               if (!saddr || nexthop_nh->nh_saddr == saddr) {
-                       res->nh_sel = nhsel;
-                       res->nhc = &nexthop_nh->nh_common;
-                       return;
-               }
-
-               if (found)
-                       return;
-
        } endfor_nexthops(fi);
 }
 #endif