From 6234288e7c1ef8bf2cf9413f8da340680f77c91d Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Mon, 11 Dec 2023 13:11:31 -0500
Subject: [PATCH] Fixes for 4.19

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...dump-send-uevent-once-devcd-is-ready.patch |  59 +++++
 ...devcoredump-serialize-devcd_del-work.patch | 208 ++++++++++++++++++
 queue-4.19/series                             |   2 +
 3 files changed, 269 insertions(+)
 create mode 100644 queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch
 create mode 100644 queue-4.19/devcoredump-serialize-devcd_del-work.patch

diff --git a/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch
new file mode 100644
index 00000000000..48ed7312f87
--- /dev/null
+++ b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch
@@ -0,0 +1,59 @@
+From 5829bd2d0869d252dea2f9712d317f30f4655c7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Nov 2023 20:19:32 +0530
+Subject: devcoredump: Send uevent once devcd is ready
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit af54d778a03853801d681c98c0c2a6c316ef9ca7 ]
+
+dev_coredumpm() creates a devcoredump device and adds it
+to the core kernel framework which eventually end up
+sending uevent to the user space and later creates a
+symbolic link to the failed device. An application
+running in userspace may be interested in this symbolic
+link to get the name of the failed device.
+
+In a issue scenario, once uevent sent to the user space
+it start reading '/sys/class/devcoredump/devcdX/failing_device'
+to get the actual name of the device which might not been
+created and it is in its path of creation.
+
+To fix this, suppress sending uevent till the failing device
+symbolic link gets created and send uevent once symbolic
+link is created successfully.
+
+Fixes: 833c95456a70 ("device coredump: add new device coredump class")
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index 1ba4af7f2f21f..b13574cdef69b 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -376,6 +376,7 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ 	devcd->devcd_dev.class = &devcd_class;
+ 
+ 	mutex_lock(&devcd->mutex);
++	dev_set_uevent_suppress(&devcd->devcd_dev, true);
+ 	if (device_add(&devcd->devcd_dev))
+ 		goto put_device;
+ 
+@@ -387,6 +388,8 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ 			      "devcoredump"))
+ 		/* nothing - symlink will be missing */;
+ 
++	dev_set_uevent_suppress(&devcd->devcd_dev, false);
++	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
+ 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+ 	mutex_unlock(&devcd->mutex);
+-- 
+2.42.0
+
diff --git a/queue-4.19/devcoredump-serialize-devcd_del-work.patch b/queue-4.19/devcoredump-serialize-devcd_del-work.patch
new file mode 100644
index 00000000000..b0e1203426c
--- /dev/null
+++ b/queue-4.19/devcoredump-serialize-devcd_del-work.patch
@@ -0,0 +1,208 @@
+From ae23f861bc8694e52492bb9298da4d06812bca16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Sep 2022 18:20:24 +0530
+Subject: devcoredump : Serialize devcd_del work
+
+From: Mukesh Ojha <quic_mojha@quicinc.com>
+
+[ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ]
+
+In following scenario(diagram), when one thread X running dev_coredumpm()
+adds devcd device to the framework which sends uevent notification to
+userspace and another thread Y reads this uevent and call to
+devcd_data_write() which eventually try to delete the queued timer that
+is not initialized/queued yet.
+
+So, debug object reports some warning and in the meantime, timer is
+initialized and queued from X path. and from Y path, it gets reinitialized
+again and timer->entry.pprev=NULL and try_to_grab_pending() stucks.
+
+To fix this, introduce mutex and a boolean flag to serialize the behaviour.
+
+ 	cpu0(X)			                cpu1(Y)
+
+    dev_coredump() uevent sent to user space
+    device_add()  ======================> user space process Y reads the
+                                          uevents writes to devcd fd
+                                          which results into writes to
+
+                                         devcd_data_write()
+                                           mod_delayed_work()
+                                             try_to_grab_pending()
+                                               del_timer()
+                                                 debug_assert_init()
+   INIT_DELAYED_WORK()
+   schedule_delayed_work()
+                                                   debug_object_fixup()
+                                                     timer_fixup_assert_init()
+                                                       timer_setup()
+                                                         do_init_timer()
+                                                       /*
+                                                        Above call reinitializes
+                                                        the timer to
+                                                        timer->entry.pprev=NULL
+                                                        and this will be checked
+                                                        later in timer_pending() call.
+                                                       */
+                                                 timer_pending()
+                                                  !hlist_unhashed_lockless(&timer->entry)
+                                                    !h->pprev
+                                                /*
+                                                  del_timer() checks h->pprev and finds
+                                                  it to be NULL due to which
+                                                  try_to_grab_pending() stucks.
+                                                */
+
+Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
+Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devcoredump.c | 83 +++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 81 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
+index f1a3353f34946..1ba4af7f2f21f 100644
+--- a/drivers/base/devcoredump.c
++++ b/drivers/base/devcoredump.c
+@@ -29,6 +29,47 @@ struct devcd_entry {
+ 	struct device devcd_dev;
+ 	void *data;
+ 	size_t datalen;
++	/*
++	 * Here, mutex is required to serialize the calls to del_wk work between
++	 * user/kernel space which happens when devcd is added with device_add()
++	 * and that sends uevent to user space. User space reads the uevents,
++	 * and calls to devcd_data_write() which try to modify the work which is
++	 * not even initialized/queued from devcoredump.
++	 *
++	 *
++	 *
++	 *        cpu0(X)                                 cpu1(Y)
++	 *
++	 *        dev_coredump() uevent sent to user space
++	 *        device_add()  ======================> user space process Y reads the
++	 *                                              uevents writes to devcd fd
++	 *                                              which results into writes to
++	 *
++	 *                                             devcd_data_write()
++	 *                                               mod_delayed_work()
++	 *                                                 try_to_grab_pending()
++	 *                                                   del_timer()
++	 *                                                     debug_assert_init()
++	 *       INIT_DELAYED_WORK()
++	 *       schedule_delayed_work()
++	 *
++	 *
++	 * Also, mutex alone would not be enough to avoid scheduling of
++	 * del_wk work after it get flush from a call to devcd_free()
++	 * mentioned as below.
++	 *
++	 *	disabled_store()
++	 *        devcd_free()
++	 *          mutex_lock()             devcd_data_write()
++	 *          flush_delayed_work()
++	 *          mutex_unlock()
++	 *                                   mutex_lock()
++	 *                                   mod_delayed_work()
++	 *                                   mutex_unlock()
++	 * So, delete_work flag is required.
++	 */
++	struct mutex mutex;
++	bool delete_work;
+ 	struct module *owner;
+ 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+ 			void *data, size_t datalen);
+@@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+ 	struct device *dev = kobj_to_dev(kobj);
+ 	struct devcd_entry *devcd = dev_to_devcd(dev);
+ 
+-	mod_delayed_work(system_wq, &devcd->del_wk, 0);
++	mutex_lock(&devcd->mutex);
++	if (!devcd->delete_work) {
++		devcd->delete_work = true;
++		mod_delayed_work(system_wq, &devcd->del_wk, 0);
++	}
++	mutex_unlock(&devcd->mutex);
+ 
+ 	return count;
+ }
+@@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data)
+ {
+ 	struct devcd_entry *devcd = dev_to_devcd(dev);
+ 
++	mutex_lock(&devcd->mutex);
++	if (!devcd->delete_work)
++		devcd->delete_work = true;
++
+ 	flush_delayed_work(&devcd->del_wk);
++	mutex_unlock(&devcd->mutex);
+ 	return 0;
+ }
+ 
+@@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
+ 	return sprintf(buf, "%d\n", devcd_disabled);
+ }
+ 
++/*
++ *
++ *	disabled_store()                                	worker()
++ *	 class_for_each_device(&devcd_class,
++ *		NULL, NULL, devcd_free)
++ *         ...
++ *         ...
++ *	   while ((dev = class_dev_iter_next(&iter))
++ *                                                             devcd_del()
++ *                                                               device_del()
++ *                                                                 put_device() <- last reference
++ *             error = fn(dev, data)                           devcd_dev_release()
++ *             devcd_free(dev, data)                           kfree(devcd)
++ *             mutex_lock(&devcd->mutex);
++ *
++ *
++ * In the above diagram, It looks like disabled_store() would be racing with parallely
++ * running devcd_del() and result in memory abort while acquiring devcd->mutex which
++ * is called after kfree of devcd memory  after dropping its last reference with
++ * put_device(). However, this will not happens as fn(dev, data) runs
++ * with its own reference to device via klist_node so it is not its last reference.
++ * so, above situation would not occur.
++ */
++
+ static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
+ 			      const char *buf, size_t count)
+ {
+@@ -291,13 +366,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ 	devcd->read = read;
+ 	devcd->free = free;
+ 	devcd->failing_dev = get_device(dev);
++	devcd->delete_work = false;
+ 
++	mutex_init(&devcd->mutex);
+ 	device_initialize(&devcd->devcd_dev);
+ 
+ 	dev_set_name(&devcd->devcd_dev, "devcd%d",
+ 		     atomic_inc_return(&devcd_count));
+ 	devcd->devcd_dev.class = &devcd_class;
+ 
++	mutex_lock(&devcd->mutex);
+ 	if (device_add(&devcd->devcd_dev))
+ 		goto put_device;
+ 
+@@ -311,10 +389,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
+ 
+ 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+-
++	mutex_unlock(&devcd->mutex);
+ 	return;
+  put_device:
+ 	put_device(&devcd->devcd_dev);
++	mutex_unlock(&devcd->mutex);
+  put_module:
+ 	module_put(owner);
+  free:
+-- 
+2.42.0
+
diff --git a/queue-4.19/series b/queue-4.19/series
index 7acee50769c..4c45080f8af 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -51,3 +51,5 @@ psample-require-cap_net_admin-when-joining-packets-group.patch
 drop_monitor-require-cap_sys_admin-when-joining-events-group.patch
 tools-headers-uapi-sync-linux-perf_event.h-with-the-kernel-sources.patch
 ib-isert-fix-unaligned-immediate-data-handling.patch
+devcoredump-serialize-devcd_del-work.patch
+devcoredump-send-uevent-once-devcd-is-ready.patch
-- 
2.47.3