Fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)

committer Sasha Levin <sashal@kernel.org>

Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)
author Sasha Levin <sashal@kernel.org>
Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)
committer Sasha Levin <sashal@kernel.org>
Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)
diff --git a/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch

new file mode 100644 (file)

index 0000000..48ed731
--- /dev/null
+++ b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch
@@ -0,0 +1,59 @@
+From 5829bd2d0869d252dea2f9712d317f30f4655c7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Nov 2023 20:19:32 +0530
+Subject: devcoredump: Send uevent once devcd is ready
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit af54d778a03853801d681c98c0c2a6c316ef9ca7 ]
+
+dev_coredumpm() creates a devcoredump device and adds it
+to the core kernel framework which eventually end up
+sending uevent to the user space and later creates a
+symbolic link to the failed device. An application
+running in userspace may be interested in this symbolic
+link to get the name of the failed device.
+
+In a issue scenario, once uevent sent to the user space
+it start reading '/sys/class/devcoredump/devcdX/failing_device'
+to get the actual name of the device which might not been
+created and it is in its path of creation.
+
+To fix this, suppress sending uevent till the failing device
+symbolic link gets created and send uevent once symbolic
+link is created successfully.
+
+Fixes: 833c95456a70 ("device coredump: add new device coredump class")
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index 1ba4af7f2f21f..b13574cdef69b 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -376,6 +376,7 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+       devcd->devcd_dev.class = &devcd_class;
+ 
+       mutex_lock(&devcd->mutex);
++      dev_set_uevent_suppress(&devcd->devcd_dev, true);
+       if (device_add(&devcd->devcd_dev))
+               goto put_device;
+ 
+@@ -387,6 +388,8 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+                             "devcoredump"))
+               /* nothing - symlink will be missing */;
+ 
++      dev_set_uevent_suppress(&devcd->devcd_dev, false);
++      kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
+       INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+       schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+       mutex_unlock(&devcd->mutex);
+-- 
+2.42.0
+
diff --git a/queue-4.19/devcoredump-serialize-devcd_del-work.patch b/queue-4.19/devcoredump-serialize-devcd_del-work.patch

new file mode 100644 (file)

index 0000000..b0e1203
--- /dev/null
+++ b/queue-4.19/devcoredump-serialize-devcd_del-work.patch
@@ -0,0 +1,208 @@
+From ae23f861bc8694e52492bb9298da4d06812bca16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Sep 2022 18:20:24 +0530
+Subject: devcoredump : Serialize devcd_del work
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ]
+
+In following scenario(diagram), when one thread X running dev_coredumpm()
+adds devcd device to the framework which sends uevent notification to
+userspace and another thread Y reads this uevent and call to
+devcd_data_write() which eventually try to delete the queued timer that
+is not initialized/queued yet.
+
+So, debug object reports some warning and in the meantime, timer is
+initialized and queued from X path. and from Y path, it gets reinitialized
+again and timer->entry.pprev=NULL and try_to_grab_pending() stucks.
+
+To fix this, introduce mutex and a boolean flag to serialize the behaviour.
+
+       cpu0(X)                                 cpu1(Y)
+
+    dev_coredump() uevent sent to user space
+    device_add()  ======================> user space process Y reads the
+                                          uevents writes to devcd fd
+                                          which results into writes to
+
+                                         devcd_data_write()
+                                           mod_delayed_work()
+                                             try_to_grab_pending()
+                                               del_timer()
+                                                 debug_assert_init()
+   INIT_DELAYED_WORK()
+   schedule_delayed_work()
+                                                   debug_object_fixup()
+                                                     timer_fixup_assert_init()
+                                                       timer_setup()
+                                                         do_init_timer()
+                                                       /*
+                                                        Above call reinitializes
+                                                        the timer to
+                                                        timer->entry.pprev=NULL
+                                                        and this will be checked
+                                                        later in timer_pending() call.
+                                                       */
+                                                 timer_pending()
+                                                  !hlist_unhashed_lockless(&timer->entry)
+                                                    !h->pprev
+                                                /*
+                                                  del_timer() checks h->pprev and finds
+                                                  it to be NULL due to which
+                                                  try_to_grab_pending() stucks.
+                                                */
+
+Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 83 +++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 81 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index f1a3353f34946..1ba4af7f2f21f 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -29,6 +29,47 @@ struct devcd_entry {
+       struct device devcd_dev;
+       void *data;
+       size_t datalen;
++      /*
++       * Here, mutex is required to serialize the calls to del_wk work between
++       * user/kernel space which happens when devcd is added with device_add()
++       * and that sends uevent to user space. User space reads the uevents,
++       * and calls to devcd_data_write() which try to modify the work which is
++       * not even initialized/queued from devcoredump.
++       *
++       *
++       *
++       *        cpu0(X)                                 cpu1(Y)
++       *
++       *        dev_coredump() uevent sent to user space
++       *        device_add()  ======================> user space process Y reads the
++       *                                              uevents writes to devcd fd
++       *                                              which results into writes to
++       *
++       *                                             devcd_data_write()
++       *                                               mod_delayed_work()
++       *                                                 try_to_grab_pending()
++       *                                                   del_timer()
++       *                                                     debug_assert_init()
++       *       INIT_DELAYED_WORK()
++       *       schedule_delayed_work()
++       *
++       *
++       * Also, mutex alone would not be enough to avoid scheduling of
++       * del_wk work after it get flush from a call to devcd_free()
++       * mentioned as below.
++       *
++       *      disabled_store()
++       *        devcd_free()
++       *          mutex_lock()             devcd_data_write()
++       *          flush_delayed_work()
++       *          mutex_unlock()
++       *                                   mutex_lock()
++       *                                   mod_delayed_work()
++       *                                   mutex_unlock()
++       * So, delete_work flag is required.
++       */
++      struct mutex mutex;
++      bool delete_work;
+       struct module *owner;
+       ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+                       void *data, size_t datalen);
+@@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+       struct device *dev = kobj_to_dev(kobj);
+       struct devcd_entry *devcd = dev_to_devcd(dev);
+ 
+-      mod_delayed_work(system_wq, &devcd->del_wk, 0);
++      mutex_lock(&devcd->mutex);
++      if (!devcd->delete_work) {
++              devcd->delete_work = true;
++              mod_delayed_work(system_wq, &devcd->del_wk, 0);
++      }
++      mutex_unlock(&devcd->mutex);
+ 
+       return count;
+ }
+@@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data)
+ {
+       struct devcd_entry *devcd = dev_to_devcd(dev);
+ 
++      mutex_lock(&devcd->mutex);
++      if (!devcd->delete_work)
++              devcd->delete_work = true;
++
+       flush_delayed_work(&devcd->del_wk);
++      mutex_unlock(&devcd->mutex);
+       return 0;
+ }
+ 
+@@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
+       return sprintf(buf, "%d\n", devcd_disabled);
+ }
+ 
++/*
++ *
++ *    disabled_store()                                        worker()
++ *     class_for_each_device(&devcd_class,
++ *            NULL, NULL, devcd_free)
++ *         ...
++ *         ...
++ *       while ((dev = class_dev_iter_next(&iter))
++ *                                                             devcd_del()
++ *                                                               device_del()
++ *                                                                 put_device() <- last reference
++ *             error = fn(dev, data)                           devcd_dev_release()
++ *             devcd_free(dev, data)                           kfree(devcd)
++ *             mutex_lock(&devcd->mutex);
++ *
++ *
++ * In the above diagram, It looks like disabled_store() would be racing with parallely
++ * running devcd_del() and result in memory abort while acquiring devcd->mutex which
++ * is called after kfree of devcd memory  after dropping its last reference with
++ * put_device(). However, this will not happens as fn(dev, data) runs
++ * with its own reference to device via klist_node so it is not its last reference.
++ * so, above situation would not occur.
++ */
++
+ static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
+                             const char *buf, size_t count)
+ {
+@@ -291,13 +366,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+       devcd->read = read;
+       devcd->free = free;
+       devcd->failing_dev = get_device(dev);
++      devcd->delete_work = false;
+ 
++      mutex_init(&devcd->mutex);
+       device_initialize(&devcd->devcd_dev);
+ 
+       dev_set_name(&devcd->devcd_dev, "devcd%d",
+                    atomic_inc_return(&devcd_count));
+       devcd->devcd_dev.class = &devcd_class;
+ 
++      mutex_lock(&devcd->mutex);
+       if (device_add(&devcd->devcd_dev))
+               goto put_device;
+ 
+@@ -311,10 +389,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ 
+       INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+       schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+-
++      mutex_unlock(&devcd->mutex);
+       return;
+  put_device:
+       put_device(&devcd->devcd_dev);
++      mutex_unlock(&devcd->mutex);
+  put_module:
+       module_put(owner);
+  free:
+-- 
+2.42.0
+
diff --git a/queue-4.19/series b/queue-4.19/series

index 7acee50769cc4cf0a03848ce15f586d788d277d8..4c45080f8af2bc97383c171757d49784623d8011 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -51,3 +51,5 @@ psample-require-cap_net_admin-when-joining-packets-group.patch
  drop_monitor-require-cap_sys_admin-when-joining-events-group.patch
  tools-headers-uapi-sync-linux-perf_event.h-with-the-kernel-sources.patch
  ib-isert-fix-unaligned-immediate-data-handling.patch
+devcoredump-serialize-devcd_del-work.patch
+devcoredump-send-uevent-once-devcd-is-ready.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 11 Dec 2023 18:11:31 +0000 (13:11 -0500)
queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/devcoredump-serialize-devcd_del-work.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history