--- /dev/null
+From 5829bd2d0869d252dea2f9712d317f30f4655c7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Nov 2023 20:19:32 +0530
+Subject: devcoredump: Send uevent once devcd is ready
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit af54d778a03853801d681c98c0c2a6c316ef9ca7 ]
+
+dev_coredumpm() creates a devcoredump device and adds it
+to the core kernel framework which eventually end up
+sending uevent to the user space and later creates a
+symbolic link to the failed device. An application
+running in userspace may be interested in this symbolic
+link to get the name of the failed device.
+
+In a issue scenario, once uevent sent to the user space
+it start reading '/sys/class/devcoredump/devcdX/failing_device'
+to get the actual name of the device which might not been
+created and it is in its path of creation.
+
+To fix this, suppress sending uevent till the failing device
+symbolic link gets created and send uevent once symbolic
+link is created successfully.
+
+Fixes: 833c95456a70 ("device coredump: add new device coredump class")
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index 1ba4af7f2f21f..b13574cdef69b 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -376,6 +376,7 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ devcd->devcd_dev.class = &devcd_class;
+
+ mutex_lock(&devcd->mutex);
++ dev_set_uevent_suppress(&devcd->devcd_dev, true);
+ if (device_add(&devcd->devcd_dev))
+ goto put_device;
+
+@@ -387,6 +388,8 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ "devcoredump"))
+ /* nothing - symlink will be missing */;
+
++ dev_set_uevent_suppress(&devcd->devcd_dev, false);
++ kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
+ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+ mutex_unlock(&devcd->mutex);
+--
+2.42.0
+
--- /dev/null
+From ae23f861bc8694e52492bb9298da4d06812bca16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Sep 2022 18:20:24 +0530
+Subject: devcoredump : Serialize devcd_del work
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ]
+
+In following scenario(diagram), when one thread X running dev_coredumpm()
+adds devcd device to the framework which sends uevent notification to
+userspace and another thread Y reads this uevent and call to
+devcd_data_write() which eventually try to delete the queued timer that
+is not initialized/queued yet.
+
+So, debug object reports some warning and in the meantime, timer is
+initialized and queued from X path. and from Y path, it gets reinitialized
+again and timer->entry.pprev=NULL and try_to_grab_pending() stucks.
+
+To fix this, introduce mutex and a boolean flag to serialize the behaviour.
+
+ cpu0(X) cpu1(Y)
+
+ dev_coredump() uevent sent to user space
+ device_add() ======================> user space process Y reads the
+ uevents writes to devcd fd
+ which results into writes to
+
+ devcd_data_write()
+ mod_delayed_work()
+ try_to_grab_pending()
+ del_timer()
+ debug_assert_init()
+ INIT_DELAYED_WORK()
+ schedule_delayed_work()
+ debug_object_fixup()
+ timer_fixup_assert_init()
+ timer_setup()
+ do_init_timer()
+ /*
+ Above call reinitializes
+ the timer to
+ timer->entry.pprev=NULL
+ and this will be checked
+ later in timer_pending() call.
+ */
+ timer_pending()
+ !hlist_unhashed_lockless(&timer->entry)
+ !h->pprev
+ /*
+ del_timer() checks h->pprev and finds
+ it to be NULL due to which
+ try_to_grab_pending() stucks.
+ */
+
+Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 83 +++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 81 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index f1a3353f34946..1ba4af7f2f21f 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -29,6 +29,47 @@ struct devcd_entry {
+ struct device devcd_dev;
+ void *data;
+ size_t datalen;
++ /*
++ * Here, mutex is required to serialize the calls to del_wk work between
++ * user/kernel space which happens when devcd is added with device_add()
++ * and that sends uevent to user space. User space reads the uevents,
++ * and calls to devcd_data_write() which try to modify the work which is
++ * not even initialized/queued from devcoredump.
++ *
++ *
++ *
++ * cpu0(X) cpu1(Y)
++ *
++ * dev_coredump() uevent sent to user space
++ * device_add() ======================> user space process Y reads the
++ * uevents writes to devcd fd
++ * which results into writes to
++ *
++ * devcd_data_write()
++ * mod_delayed_work()
++ * try_to_grab_pending()
++ * del_timer()
++ * debug_assert_init()
++ * INIT_DELAYED_WORK()
++ * schedule_delayed_work()
++ *
++ *
++ * Also, mutex alone would not be enough to avoid scheduling of
++ * del_wk work after it get flush from a call to devcd_free()
++ * mentioned as below.
++ *
++ * disabled_store()
++ * devcd_free()
++ * mutex_lock() devcd_data_write()
++ * flush_delayed_work()
++ * mutex_unlock()
++ * mutex_lock()
++ * mod_delayed_work()
++ * mutex_unlock()
++ * So, delete_work flag is required.
++ */
++ struct mutex mutex;
++ bool delete_work;
+ struct module *owner;
+ ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen);
+@@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+ struct device *dev = kobj_to_dev(kobj);
+ struct devcd_entry *devcd = dev_to_devcd(dev);
+
+- mod_delayed_work(system_wq, &devcd->del_wk, 0);
++ mutex_lock(&devcd->mutex);
++ if (!devcd->delete_work) {
++ devcd->delete_work = true;
++ mod_delayed_work(system_wq, &devcd->del_wk, 0);
++ }
++ mutex_unlock(&devcd->mutex);
+
+ return count;
+ }
+@@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data)
+ {
+ struct devcd_entry *devcd = dev_to_devcd(dev);
+
++ mutex_lock(&devcd->mutex);
++ if (!devcd->delete_work)
++ devcd->delete_work = true;
++
+ flush_delayed_work(&devcd->del_wk);
++ mutex_unlock(&devcd->mutex);
+ return 0;
+ }
+
+@@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
+ return sprintf(buf, "%d\n", devcd_disabled);
+ }
+
++/*
++ *
++ * disabled_store() worker()
++ * class_for_each_device(&devcd_class,
++ * NULL, NULL, devcd_free)
++ * ...
++ * ...
++ * while ((dev = class_dev_iter_next(&iter))
++ * devcd_del()
++ * device_del()
++ * put_device() <- last reference
++ * error = fn(dev, data) devcd_dev_release()
++ * devcd_free(dev, data) kfree(devcd)
++ * mutex_lock(&devcd->mutex);
++ *
++ *
++ * In the above diagram, It looks like disabled_store() would be racing with parallely
++ * running devcd_del() and result in memory abort while acquiring devcd->mutex which
++ * is called after kfree of devcd memory after dropping its last reference with
++ * put_device(). However, this will not happens as fn(dev, data) runs
++ * with its own reference to device via klist_node so it is not its last reference.
++ * so, above situation would not occur.
++ */
++
+ static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t count)
+ {
+@@ -291,13 +366,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ devcd->read = read;
+ devcd->free = free;
+ devcd->failing_dev = get_device(dev);
++ devcd->delete_work = false;
+
++ mutex_init(&devcd->mutex);
+ device_initialize(&devcd->devcd_dev);
+
+ dev_set_name(&devcd->devcd_dev, "devcd%d",
+ atomic_inc_return(&devcd_count));
+ devcd->devcd_dev.class = &devcd_class;
+
++ mutex_lock(&devcd->mutex);
+ if (device_add(&devcd->devcd_dev))
+ goto put_device;
+
+@@ -311,10 +389,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+
+ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+-
++ mutex_unlock(&devcd->mutex);
+ return;
+ put_device:
+ put_device(&devcd->devcd_dev);
++ mutex_unlock(&devcd->mutex);
+ put_module:
+ module_put(owner);
+ free:
+--
+2.42.0
+