From: Breno Leitao Date: Thu, 4 Jun 2026 16:10:11 +0000 (-0700) Subject: netconsole: do not dequeue pooled skbs that cannot satisfy len X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6c537b845c99e32312a1bd84d4c95cdb26efb577;p=thirdparty%2Flinux.git netconsole: do not dequeue pooled skbs that cannot satisfy len find_skb() falls back to np->skb_pool when the GFP_ATOMIC alloc_skb() fails. The pool is refilled by refill_skbs(), which always allocates buffers of MAX_SKB_SIZE (ethhdr + iphdr + udphdr + MAX_UDP_CHUNK == 1502 bytes). netconsole, however, computes the requested length dynamically as total_len + np->dev->needed_tailroom If the egress device declares a non-zero needed_tailroom (e.g. some tunnel or hardware accelerator devices), the required length can exceed MAX_SKB_SIZE. The pooled skb is then handed back to the caller, which immediately performs skb_put(skb, len), trips the tail > end check, and triggers skb_over_panic(). Leave the normal alloc_skb(len, GFP_ATOMIC) path untouched -- the slab allocator can still satisfy oversized requests when memory is available, so senders to devices with non-zero needed_tailroom keep working in the common case. Only the pool fallback is gated: when alloc_skb() failed and len exceeds the pool buffer size, skip the skb_dequeue() instead of burning a pre-allocated skb on a request that would later trip skb_over_panic(). Reserving pool entries for requests they can actually satisfy also keeps the panic path, which depends on the pool being primed, intact. When that drop happens, emit a rate-limited net_warn() so the user notices that netconsole is unable to push messages on the egress device. The warn is skipped under in_nmi() for the same reason schedule_work() is: printk machinery taken by net_warn_ratelimited() is not NMI-safe and would risk recursing into the same nbcon console we are servicing. MAX_SKB_SIZE / MAX_UDP_CHUNK were private to net/core/netpoll.c. Move them to include/linux/netpoll.h so netconsole can reference the same definition that refill_skbs() uses, keeping the two in sync by construction. The header now pulls in and explicitly so MAX_SKB_SIZE remains self-contained for any future user. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260604-netcons_fix_before_move-v3-2-ab055b3a6aa5@debian.org Signed-off-by: Paolo Abeni --- diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 918e4a9f4456..58250e648f8b 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1655,15 +1655,33 @@ static struct notifier_block netconsole_netdev_notifier = { }; /* Pop a pre-allocated skb from the pool and request a refill. + * + * The pool is refilled with MAX_SKB_SIZE buffers, so a pooled skb cannot + * satisfy a larger request. Return NULL in that case rather than handing + * back a too-small skb that would later trip skb_over_panic() in skb_put(); + * the caller still polls and retries, and alloc_skb() itself can satisfy the + * oversized request once memory frees up. * * The refill is requested via schedule_work(), which takes the workqueue * pool locks and is therefore not NMI-safe. Skip the refill when called * from NMI context; the next non-NMI caller will top the pool back up. */ -static struct sk_buff *netcons_skb_pop(struct netpoll *np) +static struct sk_buff *netcons_skb_pop(struct netpoll *np, int len) { struct sk_buff *skb; + if (len > MAX_SKB_SIZE) { + /* net_warn_ratelimited() pulls in printk machinery that is not + * NMI-safe and could recurse into the nbcon console we are + * servicing, so only warn outside NMI. + */ + if (!in_nmi()) + net_warn_ratelimited("netconsole: dropping message, requested skb len %d exceeds pool buffer size %zu on %s\n", + len, (size_t)MAX_SKB_SIZE, + np->dev->name); + return NULL; + } + skb = skb_dequeue(&np->skb_pool); if (!in_nmi()) schedule_work(&np->refill_wq); @@ -1681,7 +1699,7 @@ repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) - skb = netcons_skb_pop(np); + skb = netcons_skb_pop(np, len); if (!skb) { if (++count < 10) { diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e4b8f1f91e54..88f7daa8560e 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -13,12 +13,28 @@ #include #include #include +#include +#include union inet_addr { __be32 ip; struct in6_addr in6; }; +/* + * Maximum payload netpoll's preallocated skb pool can carry. Keep this in + * sync with the buffer size used by refill_skbs() in net/core/netpoll.c; + * callers (e.g. netconsole) use it to detect requests the pool can never + * satisfy and avoid dequeuing a pooled skb that would later trip + * skb_over_panic() in skb_put(). + */ +#define MAX_UDP_CHUNK 1460 +#define MAX_SKB_SIZE \ + (sizeof(struct ethhdr) + \ + sizeof(struct iphdr) + \ + sizeof(struct udphdr) + \ + MAX_UDP_CHUNK) + struct netpoll { struct net_device *dev; netdevice_tracker dev_tracker; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b3fe59445f2d..229dde818ab3 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -41,16 +41,9 @@ * message gets out even in extreme OOM situations. */ -#define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define USEC_PER_POLL 50 -#define MAX_SKB_SIZE \ - (sizeof(struct ethhdr) + \ - sizeof(struct iphdr) + \ - sizeof(struct udphdr) + \ - MAX_UDP_CHUNK) - static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644);