From: Jakub Kicinski Date: Wed, 24 Jun 2026 18:20:16 +0000 (-0700) Subject: net: add the driver-facing netdev_work scheduling API X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=129cdce9da9e44c52d38889e0411be9817bca114;p=thirdparty%2Fkernel%2Fstable.git net: add the driver-facing netdev_work scheduling API With an extra event mask we can easily extend the netdev work to also service driver-defined events. For advanced drivers this is probably not a perfect match, but it makes running deferred work easier in simple cases. Expose the netdev_work facility to drivers. Add helpers to schedule work and a dedicated ndo to perform the driver- -scheduled actions. Reviewed-by: Kuniyuki Iwashima Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260624182018.2445732-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 732506787db3..9981d637f8b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1131,6 +1131,9 @@ struct netdev_net_notifier { * netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a * negative errno to request a retry via the core backoff. * + * void (*ndo_work)(struct net_device *dev, unsigned long events); + * Run deferred work scheduled with netdev_work_sched(@events). + * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the @@ -1460,6 +1463,8 @@ struct net_device_ops { struct net_device *dev, struct netdev_hw_addr_list *uc, struct netdev_hw_addr_list *mc); + void (*ndo_work)(struct net_device *dev, + unsigned long events); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev); @@ -1932,6 +1937,8 @@ enum netdev_reg_state { * does not implement ndo_set_rx_mode() * @work_node: List entry for async netdev_work processing * @work_tracker: Refcount tracker for async netdev_work + * @work_pending: Driver-defined pending netdev_work, passed to + * ndo_work() (see netdev_work_sched()) * @work_core_pending: Core-defined pending netdev_work (NETDEV_WORK_*) * @rx_mode_addr_cache: Recycled snapshot entries for rx_mode work * @rx_mode_retry_timer: Timer that re-queues rx_mode work after failure @@ -2329,6 +2336,7 @@ struct net_device { bool uc_promisc; struct list_head work_node; netdevice_tracker work_tracker; + unsigned long work_pending; unsigned long work_core_pending; struct netdev_hw_addr_list rx_mode_addr_cache; struct timer_list rx_mode_retry_timer; @@ -5178,6 +5186,9 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); +void netdev_work_sched(struct net_device *dev, unsigned long events); +unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask); + enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, diff --git a/net/core/netdev_work.c b/net/core/netdev_work.c index c121c24dc493..3109fae132ad 100644 --- a/net/core/netdev_work.c +++ b/net/core/netdev_work.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include @@ -16,32 +17,63 @@ static void netdev_work_proc(struct work_struct *work); * - within the list entries (struct net_device fields): * - work_node * - work_tracker + * - work_pending * - work_core_pending */ static LIST_HEAD(netdev_work_list); static DEFINE_SPINLOCK(netdev_work_lock); static DECLARE_WORK(netdev_work, netdev_work_proc); -void __netdev_work_core_sched(struct net_device *dev, unsigned long event) +static void netdev_work_enqueue(struct net_device *dev, unsigned long events, + unsigned long core) { + if (!events && !core) + return; + spin_lock_bh(&netdev_work_lock); if (list_empty(&dev->work_node)) { list_add_tail(&dev->work_node, &netdev_work_list); netdev_hold(dev, &dev->work_tracker, GFP_ATOMIC); } - dev->work_core_pending |= event; + dev->work_pending |= events; + dev->work_core_pending |= core; spin_unlock_bh(&netdev_work_lock); schedule_work(&netdev_work); } +static unsigned long +netdev_work_dequeue(struct net_device *dev, unsigned long *pending, + unsigned long mask) +{ + unsigned long events; + + spin_lock_bh(&netdev_work_lock); + events = *pending & mask; + *pending &= ~events; + if (!list_empty(&dev->work_node) && + !dev->work_pending && !dev->work_core_pending) { + list_del_init(&dev->work_node); + netdev_put(dev, &dev->work_tracker); + } + spin_unlock_bh(&netdev_work_lock); + + return events; +} + +void netdev_work_sched(struct net_device *dev, unsigned long events) +{ + netdev_work_enqueue(dev, events, 0); +} +EXPORT_SYMBOL(netdev_work_sched); + /** - * __netdev_work_core_cancel() - cancel selected core work for a netdev + * netdev_work_cancel() - cancel selected work for a netdev * @dev: net_device * @mask: events to cancel * * Clear @mask from the device's work pending mask. If no work is left pending - * the device is dequeued. + * the device is dequeued and its ndo_work won't be called. * * No expectations on locking, but also no guarantees provided. If the caller * wants to touch @dev afterwards (e.g. call the work that got canceled) @@ -50,21 +82,33 @@ void __netdev_work_core_sched(struct net_device *dev, unsigned long event) * Returns: the subset of @mask that was actually pending, so the caller can run * those events inline. */ +unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask) +{ + return netdev_work_dequeue(dev, &dev->work_pending, mask); +} +EXPORT_SYMBOL(netdev_work_cancel); + +void __netdev_work_core_sched(struct net_device *dev, unsigned long events) +{ + netdev_work_enqueue(dev, 0, events); +} + unsigned long __netdev_work_core_cancel(struct net_device *dev, unsigned long mask) { - unsigned long event; + return netdev_work_dequeue(dev, &dev->work_core_pending, mask); +} - spin_lock_bh(&netdev_work_lock); - event = dev->work_core_pending & mask; - dev->work_core_pending &= ~mask; - if (!list_empty(&dev->work_node) && !dev->work_core_pending) { - list_del_init(&dev->work_node); - netdev_put(dev, &dev->work_tracker); - } - spin_unlock_bh(&netdev_work_lock); +static void netdev_work_run(struct net_device *dev, unsigned long events, + unsigned long core) +{ + if (!netif_device_present(dev)) + return; - return event; + if (core & NETDEV_WORK_RX_MODE) + netif_rx_mode_run(dev); + if (events && dev->netdev_ops->ndo_work) + dev->netdev_ops->ndo_work(dev, events); } static void netdev_work_proc(struct work_struct *work) @@ -72,9 +116,9 @@ static void netdev_work_proc(struct work_struct *work) rtnl_lock(); while (true) { + unsigned long events = 0, core = 0; netdevice_tracker tracker; struct net_device *dev; - unsigned long core = 0; spin_lock_bh(&netdev_work_lock); if (list_empty(&netdev_work_list)) { @@ -98,16 +142,17 @@ static void netdev_work_proc(struct work_struct *work) list_del_init(&dev->work_node); core = dev->work_core_pending; dev->work_core_pending = 0; + events = dev->work_pending; + dev->work_pending = 0; /* We took another ref above */ netdev_put(dev, &dev->work_tracker); if (!dev_isalive(dev)) - core = 0; + core = events = 0; } spin_unlock_bh(&netdev_work_lock); - if (core & NETDEV_WORK_RX_MODE) - netif_rx_mode_run(dev); + netdev_work_run(dev, events, core); netdev_unlock_ops(dev); netdev_put(dev, &tracker);