]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ipv4: nexthop: allocate skb dynamically in rtm_get_nexthop()
authorFernando Fernandez Mancera <fmancera@suse.de>
Thu, 2 Apr 2026 07:26:13 +0000 (09:26 +0200)
committerJakub Kicinski <kuba@kernel.org>
Fri, 3 Apr 2026 22:34:27 +0000 (15:34 -0700)
When querying a nexthop object via RTM_GETNEXTHOP, the kernel currently
allocates a fixed-size skb using NLMSG_GOODSIZE. While sufficient for
single nexthops and small Equal-Cost Multi-Path groups, this fixed
allocation fails for large nexthop groups like 512 nexthops.

This results in the following warning splat:

 WARNING: net/ipv4/nexthop.c:3395 at rtm_get_nexthop+0x176/0x1c0, CPU#20: rep/4608
 [...]
 RIP: 0010:rtm_get_nexthop (net/ipv4/nexthop.c:3395)
 [...]
 Call Trace:
  <TASK>
  rtnetlink_rcv_msg (net/core/rtnetlink.c:6989)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  ____sys_sendmsg (net/socket.c:721 net/socket.c:736 net/socket.c:2585)
  ___sys_sendmsg (net/socket.c:2641)
  __sys_sendmsg (net/socket.c:2671)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>

Fix this by allocating the size dynamically using nh_nlmsg_size() and
using nlmsg_new(), this is consistent with nexthop_notify() behavior. In
addition, adjust nh_nlmsg_size_grp() so it calculates the size needed
based on flags passed. While at it, also add the size of NHA_FDB for
nexthop group size calculation as it was missing too.

This cannot be reproduced via iproute2 as the group size is currently
limited and the command fails as follows:

addattr_l ERROR: message exceeded bound of 1048

Fixes: 430a049190de ("nexthop: Add support for nexthop groups")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Closes: https://lore.kernel.org/netdev/CAL_bE8Li2h4KO+AQFXW4S6Yb_u5X4oSKnkywW+LPFjuErhqELA@mail.gmail.com/
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20260402072613.25262-2-fmancera@suse.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/nexthop.c

index a0c69458329940b74cc364951f7323d813e5cb8c..2c9036c719b686ed32b96b9ee26468255d63c8be 100644 (file)
@@ -1003,16 +1003,32 @@ static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
                nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
 }
 
-static size_t nh_nlmsg_size_grp(struct nexthop *nh)
+static size_t nh_nlmsg_size_grp(struct nexthop *nh, u32 op_flags)
 {
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
        size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
        size_t tot = nla_total_size(sz) +
-               nla_total_size(2); /* NHA_GROUP_TYPE */
+               nla_total_size(2) +     /* NHA_GROUP_TYPE */
+               nla_total_size(0);      /* NHA_FDB */
 
        if (nhg->resilient)
                tot += nh_nlmsg_size_grp_res(nhg);
 
+       if (op_flags & NHA_OP_FLAG_DUMP_STATS) {
+               tot += nla_total_size(0) +        /* NHA_GROUP_STATS */
+                      nla_total_size(4);         /* NHA_HW_STATS_ENABLE */
+               tot += nhg->num_nh *
+                      (nla_total_size(0) +       /* NHA_GROUP_STATS_ENTRY */
+                       nla_total_size(4) +       /* NHA_GROUP_STATS_ENTRY_ID */
+                       nla_total_size_64bit(8)); /* NHA_GROUP_STATS_ENTRY_PACKETS */
+
+               if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS) {
+                       tot += nhg->num_nh *
+                              nla_total_size_64bit(8); /* NHA_GROUP_STATS_ENTRY_PACKETS_HW */
+                       tot += nla_total_size(4);       /* NHA_HW_STATS_USED */
+               }
+       }
+
        return tot;
 }
 
@@ -1047,14 +1063,14 @@ static size_t nh_nlmsg_size_single(struct nexthop *nh)
        return sz;
 }
 
-static size_t nh_nlmsg_size(struct nexthop *nh)
+static size_t nh_nlmsg_size(struct nexthop *nh, u32 op_flags)
 {
        size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
 
        sz += nla_total_size(4); /* NHA_ID */
 
        if (nh->is_group)
-               sz += nh_nlmsg_size_grp(nh) +
+               sz += nh_nlmsg_size_grp(nh, op_flags) +
                      nla_total_size(4) +       /* NHA_OP_FLAGS */
                      0;
        else
@@ -1070,7 +1086,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
+       skb = nlmsg_new(nh_nlmsg_size(nh, 0), gfp_any());
        if (!skb)
                goto errout;
 
@@ -3376,15 +3392,15 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        if (err)
                return err;
 
-       err = -ENOBUFS;
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb)
-               goto out;
-
        err = -ENOENT;
        nh = nexthop_find_by_id(net, id);
        if (!nh)
-               goto errout_free;
+               goto out;
+
+       err = -ENOBUFS;
+       skb = nlmsg_new(nh_nlmsg_size(nh, op_flags), GFP_KERNEL);
+       if (!skb)
+               goto out;
 
        err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
                           nlh->nlmsg_seq, 0, op_flags);