]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: Lag, multipath, give priority for routes with smaller network prefix
authorPatrisious Haddad <phaddad@nvidia.com>
Thu, 25 Dec 2025 13:27:13 +0000 (15:27 +0200)
committerJakub Kicinski <kuba@kernel.org>
Sun, 4 Jan 2026 18:38:49 +0000 (10:38 -0800)
Today multipath offload is controlled by a single route and the route
controlling is selected if it meets one of the following criteria:
        1. No controlling route is set.
        2. New route destination is the same as old one.
        3. New route metric is lower than old route metric.

This can cause unwanted behaviour in case a new route is added
with a smaller network prefix which should get the priority.

Fix this by adding a new criteria to give priority to new route with
a smaller network prefix.

Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry")
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-2-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c

index aee17fcf3b36c3dcfbdf7dd00ca7fd17cae00681..cdc99fe5c9568653a4ec946a0f1574e0de8c6433 100644 (file)
@@ -173,10 +173,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
        }
 
        /* Handle multipath entry with lower priority value */
-       if (mp->fib.mfi && mp->fib.mfi != fi &&
+       if (mp->fib.mfi &&
            (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
-           fi->fib_priority >= mp->fib.priority)
+           mp->fib.dst_len <= fen_info->dst_len &&
+           !(mp->fib.dst_len == fen_info->dst_len &&
+             fi->fib_priority < mp->fib.priority)) {
+               mlx5_core_dbg(ldev->pf[idx].dev,
+                             "Multipath entry with lower priority was rejected\n");
                return;
+       }
 
        nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
        nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);