]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net: netconsole: Defer netpoll cleanup to avoid lock release during list traversal
authorBreno Leitao <leitao@debian.org>
Thu, 8 Aug 2024 12:25:11 +0000 (05:25 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 13 Aug 2024 08:58:58 +0000 (10:58 +0200)
Current issue:
- The `target_list_lock` spinlock is held while iterating over
  target_list() entries.
- Mid-loop, the lock is released to call __netpoll_cleanup(), then
  reacquired.
- This practice compromises the protection provided by
  `target_list_lock`.

Reason for current design:
1. __netpoll_cleanup() may sleep, incompatible with holding a spinlock.
2. target_list_lock must be a spinlock because write_msg() cannot sleep.
   (See commit b5427c27173e ("[NET] netconsole: Support multiple logging
    targets"))

Defer the cleanup of the netpoll structure to outside the
target_list_lock() protected area. Create another list
(target_cleanup_list) to hold the entries that need to be cleaned up,
and clean them using a mutex (target_cleanup_list_lock).

Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/netconsole.c

index 69eeab4a1e26d26727b2caf4432acda64667de99..43c29b15adbfad0f6929fa6c9b84abdb98e753b2 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/configfs.h>
 #include <linux/etherdevice.h>
 #include <linux/utsname.h>
+#include <linux/rtnetlink.h>
 
 MODULE_AUTHOR("Matt Mackall <mpm@selenic.com>");
 MODULE_DESCRIPTION("Console driver for network interfaces");
@@ -72,9 +73,16 @@ __setup("netconsole=", option_setup);
 
 /* Linked list of all configured targets */
 static LIST_HEAD(target_list);
+/* target_cleanup_list is used to track targets that need to be cleaned outside
+ * of target_list_lock. It should be cleaned in the same function it is
+ * populated.
+ */
+static LIST_HEAD(target_cleanup_list);
 
 /* This needs to be a spinlock because write_msg() cannot sleep */
 static DEFINE_SPINLOCK(target_list_lock);
+/* This needs to be a mutex because netpoll_cleanup might sleep */
+static DEFINE_MUTEX(target_cleanup_list_lock);
 
 /*
  * Console driver for extended netconsoles.  Registered on the first use to
@@ -210,6 +218,33 @@ static struct netconsole_target *alloc_and_init(void)
        return nt;
 }
 
+/* Clean up every target in the cleanup_list and move the clean targets back to
+ * the main target_list.
+ */
+static void netconsole_process_cleanups_core(void)
+{
+       struct netconsole_target *nt, *tmp;
+       unsigned long flags;
+
+       /* The cleanup needs RTNL locked */
+       ASSERT_RTNL();
+
+       mutex_lock(&target_cleanup_list_lock);
+       list_for_each_entry_safe(nt, tmp, &target_cleanup_list, list) {
+               /* all entries in the cleanup_list needs to be disabled */
+               WARN_ON_ONCE(nt->enabled);
+               do_netpoll_cleanup(&nt->np);
+               /* moved the cleaned target to target_list. Need to hold both
+                * locks
+                */
+               spin_lock_irqsave(&target_list_lock, flags);
+               list_move(&nt->list, &target_list);
+               spin_unlock_irqrestore(&target_list_lock, flags);
+       }
+       WARN_ON_ONCE(!list_empty(&target_cleanup_list));
+       mutex_unlock(&target_cleanup_list_lock);
+}
+
 #ifdef CONFIG_NETCONSOLE_DYNAMIC
 
 /*
@@ -246,6 +281,19 @@ static struct netconsole_target *to_target(struct config_item *item)
                            struct netconsole_target, group);
 }
 
+/* Do the list cleanup with the rtnl lock hold.  rtnl lock is necessary because
+ * netdev might be cleaned-up by calling __netpoll_cleanup(),
+ */
+static void netconsole_process_cleanups(void)
+{
+       /* rtnl lock is called here, because it has precedence over
+        * target_cleanup_list_lock mutex and target_cleanup_list
+        */
+       rtnl_lock();
+       netconsole_process_cleanups_core();
+       rtnl_unlock();
+}
+
 /* Get rid of possible trailing newline, returning the new length */
 static void trim_newline(char *s, size_t maxlen)
 {
@@ -376,13 +424,20 @@ static ssize_t enabled_store(struct config_item *item,
                 * otherwise we might end up in write_msg() with
                 * nt->np.dev == NULL and nt->enabled == true
                 */
+               mutex_lock(&target_cleanup_list_lock);
                spin_lock_irqsave(&target_list_lock, flags);
                nt->enabled = false;
+               /* Remove the target from the list, while holding
+                * target_list_lock
+                */
+               list_move(&nt->list, &target_cleanup_list);
                spin_unlock_irqrestore(&target_list_lock, flags);
-               netpoll_cleanup(&nt->np);
+               mutex_unlock(&target_cleanup_list_lock);
        }
 
        ret = strnlen(buf, count);
+       /* Deferred cleanup */
+       netconsole_process_cleanups();
 out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return ret;
@@ -942,7 +997,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
                                   unsigned long event, void *ptr)
 {
        unsigned long flags;
-       struct netconsole_target *nt;
+       struct netconsole_target *nt, *tmp;
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        bool stopped = false;
 
@@ -950,9 +1005,9 @@ static int netconsole_netdev_event(struct notifier_block *this,
              event == NETDEV_RELEASE || event == NETDEV_JOIN))
                goto done;
 
+       mutex_lock(&target_cleanup_list_lock);
        spin_lock_irqsave(&target_list_lock, flags);
-restart:
-       list_for_each_entry(nt, &target_list, list) {
+       list_for_each_entry_safe(nt, tmp, &target_list, list) {
                netconsole_target_get(nt);
                if (nt->np.dev == dev) {
                        switch (event) {
@@ -962,25 +1017,16 @@ restart:
                        case NETDEV_RELEASE:
                        case NETDEV_JOIN:
                        case NETDEV_UNREGISTER:
-                               /* rtnl_lock already held
-                                * we might sleep in __netpoll_cleanup()
-                                */
                                nt->enabled = false;
-                               spin_unlock_irqrestore(&target_list_lock, flags);
-
-                               __netpoll_cleanup(&nt->np);
-
-                               spin_lock_irqsave(&target_list_lock, flags);
-                               netdev_put(nt->np.dev, &nt->np.dev_tracker);
-                               nt->np.dev = NULL;
+                               list_move(&nt->list, &target_cleanup_list);
                                stopped = true;
-                               netconsole_target_put(nt);
-                               goto restart;
                        }
                }
                netconsole_target_put(nt);
        }
        spin_unlock_irqrestore(&target_list_lock, flags);
+       mutex_unlock(&target_cleanup_list_lock);
+
        if (stopped) {
                const char *msg = "had an event";
 
@@ -999,6 +1045,11 @@ restart:
                        dev->name, msg);
        }
 
+       /* Process target_cleanup_list entries. By the end, target_cleanup_list
+        * should be empty
+        */
+       netconsole_process_cleanups_core();
+
 done:
        return NOTIFY_DONE;
 }