]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
ipv6: Start path selection from the first nexthop
authorIdo Schimmel <idosch@nvidia.com>
Wed, 2 Apr 2025 11:42:23 +0000 (14:42 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 Apr 2025 12:37:40 +0000 (14:37 +0200)
[ Upstream commit 4d0ab3a6885e3e9040310a8d8f54503366083626 ]

Cited commit transitioned IPv6 path selection to use hash-threshold
instead of modulo-N. With hash-threshold, each nexthop is assigned a
region boundary in the multipath hash function's output space and a
nexthop is chosen if the calculated hash is smaller than the nexthop's
region boundary.

Hash-threshold does not work correctly if path selection does not start
with the first nexthop. For example, if fib6_select_path() is always
passed the last nexthop in the group, then it will always be chosen
because its region boundary covers the entire hash function's output
space.

Fix this by starting the selection process from the first nexthop and do
not consider nexthops for which rt6_score_route() provided a negative
score.

Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N")
Reported-by: Stanislav Fomichev <stfomichev@gmail.com>
Closes: https://lore.kernel.org/netdev/Z9RIyKZDNoka53EO@mini-arch/
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20250402114224.293392-2-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/ipv6/route.c

index 3ab5ea55ff8c35b26048cc1c420d796f8421dc30..8f8a6970b9b7a98926e990b504d28ad29abf66d0 100644 (file)
@@ -414,11 +414,35 @@ static bool rt6_check_expired(const struct rt6_info *rt)
        return false;
 }
 
+static struct fib6_info *
+rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
+{
+       struct fib6_info *iter;
+       struct fib6_node *fn;
+
+       fn = rcu_dereference(rt->fib6_node);
+       if (!fn)
+               goto out;
+       iter = rcu_dereference(fn->leaf);
+       if (!iter)
+               goto out;
+
+       while (iter) {
+               if (iter->fib6_metric == rt->fib6_metric &&
+                   rt6_qualify_for_ecmp(iter))
+                       return iter;
+               iter = rcu_dereference(iter->fib6_next);
+       }
+
+out:
+       return NULL;
+}
+
 void fib6_select_path(const struct net *net, struct fib6_result *res,
                      struct flowi6 *fl6, int oif, bool have_oif_match,
                      const struct sk_buff *skb, int strict)
 {
-       struct fib6_info *match = res->f6i;
+       struct fib6_info *first, *match = res->f6i;
        struct fib6_info *sibling;
 
        if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
@@ -442,10 +466,18 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
                return;
        }
 
-       if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+       first = rt6_multipath_first_sibling_rcu(match);
+       if (!first)
                goto out;
 
-       list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+       if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) &&
+           rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+                           strict) >= 0) {
+               match = first;
+               goto out;
+       }
+
+       list_for_each_entry_rcu(sibling, &first->fib6_siblings,
                                fib6_siblings) {
                const struct fib6_nh *nh = sibling->fib6_nh;
                int nh_upper_bound;