net: watchdog: fix refcount tracking races

author Eric Dumazet <edumazet@google.com>

Thu, 11 Jun 2026 15:27:37 +0000 (15:27 +0000)

committer Jakub Kicinski <kuba@kernel.org>

Sat, 13 Jun 2026 00:34:57 +0000 (17:34 -0700)
author Eric Dumazet <edumazet@google.com>
Thu, 11 Jun 2026 15:27:37 +0000 (15:27 +0000)
committer Jakub Kicinski <kuba@kernel.org>
Sat, 13 Jun 2026 00:34:57 +0000 (17:34 -0700)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 0e1e581efc5ac264259b2f0fdfe41c50a6f47239..4a0e83709f29e4bcf12f479e464e6bedecc61c69 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1980,6 +1980,8 @@ enum netdev_reg_state {
   *     @qdisc_hash:            qdisc hash table
   *     @watchdog_timeo:        Represents the timeout that is used by
   *                             the watchdog (see dev_watchdog())
+ *     @watchdog_lock:         protect watchdog_ref_held
+ *     @watchdog_ref_held:     True if the watchdog device ref is taken.
   *     @watchdog_timer:        List of timers
   *
   *     @proto_down_reason:     reason a netdev interface is held down
@@ -2392,6 +2394,8 @@ struct net_device {
         /* These may be needed for future network-power-down code. */
         struct timer_list       watchdog_timer;
         int                     watchdog_timeo;
+       spinlock_t              watchdog_lock;
+       bool                    watchdog_ref_held;
  
         u32                     proto_down_reason;
  
diff --git a/net/core/dev.c b/net/core/dev.c

index 0c6c270d9f7d115feb824f4ebe6be122c40d745f..731e661d7be6574d5eca4a600e0a5623be4c2485 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11217,7 +11217,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
  
         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
         spin_lock_init(&dev->tx_global_lock);
-
+       spin_lock_init(&dev->watchdog_lock);
+       dev->watchdog_ref_held = false;
         return 0;
  }
  
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index a93321db8fd75d30c61e146c290bbc139c37c913..6cdf2ccfb0937e45271f8690a0c09d48a24ce769 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -568,16 +568,24 @@ static void dev_watchdog(struct timer_list *t)
                                 dev->netdev_ops->ndo_tx_timeout(dev, i);
                                 netif_unfreeze_queues(dev);
                         }
-                       if (!mod_timer(&dev->watchdog_timer,
-                                      round_jiffies(oldest_start +
-                                                    dev->watchdog_timeo)))
-                               release = false;
+                       spin_lock(&dev->watchdog_lock);
+                       mod_timer(&dev->watchdog_timer,
+                                 round_jiffies(oldest_start +
+                                               dev->watchdog_timeo));
+                       release = false;
+                       spin_unlock(&dev->watchdog_lock);
                 }
         }
         spin_unlock(&dev->tx_global_lock);
  
-       if (release)
+       spin_lock(&dev->watchdog_lock);
+       if (timer_pending(&dev->watchdog_timer))
+               release = false;
+       if (release && dev->watchdog_ref_held) {
                 netdev_put(dev, &dev->watchdog_dev_tracker);
+               dev->watchdog_ref_held = false;
+       }
+       spin_unlock(&dev->watchdog_lock);
  }
  
  void netdev_watchdog_up(struct net_device *dev)
@@ -586,18 +594,34 @@ void netdev_watchdog_up(struct net_device *dev)
                 return;
         if (dev->watchdog_timeo <= 0)
                 dev->watchdog_timeo = 5*HZ;
+       spin_lock_bh(&dev->tx_global_lock);
+
+       spin_lock(&dev->watchdog_lock);
         if (!mod_timer(&dev->watchdog_timer,
-                      round_jiffies(jiffies + dev->watchdog_timeo)))
-               netdev_hold(dev, &dev->watchdog_dev_tracker,
-                           GFP_ATOMIC);
+                      round_jiffies(jiffies + dev->watchdog_timeo))) {
+               if (!dev->watchdog_ref_held) {
+                       netdev_hold(dev, &dev->watchdog_dev_tracker,
+                                   GFP_ATOMIC);
+                       dev->watchdog_ref_held = true;
+               }
+       }
+       spin_unlock(&dev->watchdog_lock);
+
+       spin_unlock_bh(&dev->tx_global_lock);
  }
  EXPORT_SYMBOL_GPL(netdev_watchdog_up);
  
  static void netdev_watchdog_down(struct net_device *dev)
  {
         netif_tx_lock_bh(dev);
-       if (timer_delete(&dev->watchdog_timer))
+
+       spin_lock(&dev->watchdog_lock);
+       if (timer_delete(&dev->watchdog_timer)) {
                 netdev_put(dev, &dev->watchdog_dev_tracker);
+               dev->watchdog_ref_held = false;
+       }
+       spin_unlock(&dev->watchdog_lock);
+
         netif_tx_unlock_bh(dev);
  }
  
@@ -614,8 +638,6 @@ void netif_carrier_on(struct net_device *dev)
                         return;
                 atomic_inc(&dev->carrier_up_count);
                 linkwatch_fire_event(dev);
-               if (netif_running(dev))
-                       netdev_watchdog_up(dev);
         }
  }
  EXPORT_SYMBOL(netif_carrier_on);
author	Eric Dumazet <edumazet@google.com>
	Thu, 11 Jun 2026 15:27:37 +0000 (15:27 +0000)
committer	Jakub Kicinski <kuba@kernel.org>
	Sat, 13 Jun 2026 00:34:57 +0000 (17:34 -0700)
include/linux/netdevice.h		patch \| blob \| blame \| history
net/core/dev.c		patch \| blob \| blame \| history
net/sched/sch_generic.c		patch \| blob \| blame \| history