From 6234288e7c1ef8bf2cf9413f8da340680f77c91d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 11 Dec 2023 13:11:31 -0500 Subject: [PATCH] Fixes for 4.19 Signed-off-by: Sasha Levin --- ...dump-send-uevent-once-devcd-is-ready.patch | 59 +++++ ...devcoredump-serialize-devcd_del-work.patch | 208 ++++++++++++++++++ queue-4.19/series | 2 + 3 files changed, 269 insertions(+) create mode 100644 queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch create mode 100644 queue-4.19/devcoredump-serialize-devcd_del-work.patch diff --git a/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch new file mode 100644 index 00000000000..48ed7312f87 --- /dev/null +++ b/queue-4.19/devcoredump-send-uevent-once-devcd-is-ready.patch @@ -0,0 +1,59 @@ +From 5829bd2d0869d252dea2f9712d317f30f4655c7a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Nov 2023 20:19:32 +0530 +Subject: devcoredump: Send uevent once devcd is ready + +From: Mukesh Ojha + +[ Upstream commit af54d778a03853801d681c98c0c2a6c316ef9ca7 ] + +dev_coredumpm() creates a devcoredump device and adds it +to the core kernel framework which eventually end up +sending uevent to the user space and later creates a +symbolic link to the failed device. An application +running in userspace may be interested in this symbolic +link to get the name of the failed device. + +In a issue scenario, once uevent sent to the user space +it start reading '/sys/class/devcoredump/devcdX/failing_device' +to get the actual name of the device which might not been +created and it is in its path of creation. + +To fix this, suppress sending uevent till the failing device +symbolic link gets created and send uevent once symbolic +link is created successfully. + +Fixes: 833c95456a70 ("device coredump: add new device coredump class") +Signed-off-by: Mukesh Ojha +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/base/devcoredump.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c +index 1ba4af7f2f21f..b13574cdef69b 100644 +--- a/drivers/base/devcoredump.c ++++ b/drivers/base/devcoredump.c +@@ -376,6 +376,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, + devcd->devcd_dev.class = &devcd_class; + + mutex_lock(&devcd->mutex); ++ dev_set_uevent_suppress(&devcd->devcd_dev, true); + if (device_add(&devcd->devcd_dev)) + goto put_device; + +@@ -387,6 +388,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, + "devcoredump")) + /* nothing - symlink will be missing */; + ++ dev_set_uevent_suppress(&devcd->devcd_dev, false); ++ kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); + mutex_unlock(&devcd->mutex); +-- +2.42.0 + diff --git a/queue-4.19/devcoredump-serialize-devcd_del-work.patch b/queue-4.19/devcoredump-serialize-devcd_del-work.patch new file mode 100644 index 00000000000..b0e1203426c --- /dev/null +++ b/queue-4.19/devcoredump-serialize-devcd_del-work.patch @@ -0,0 +1,208 @@ +From ae23f861bc8694e52492bb9298da4d06812bca16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Sep 2022 18:20:24 +0530 +Subject: devcoredump : Serialize devcd_del work + +From: Mukesh Ojha + +[ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ] + +In following scenario(diagram), when one thread X running dev_coredumpm() +adds devcd device to the framework which sends uevent notification to +userspace and another thread Y reads this uevent and call to +devcd_data_write() which eventually try to delete the queued timer that +is not initialized/queued yet. + +So, debug object reports some warning and in the meantime, timer is +initialized and queued from X path. and from Y path, it gets reinitialized +again and timer->entry.pprev=NULL and try_to_grab_pending() stucks. + +To fix this, introduce mutex and a boolean flag to serialize the behaviour. + + cpu0(X) cpu1(Y) + + dev_coredump() uevent sent to user space + device_add() ======================> user space process Y reads the + uevents writes to devcd fd + which results into writes to + + devcd_data_write() + mod_delayed_work() + try_to_grab_pending() + del_timer() + debug_assert_init() + INIT_DELAYED_WORK() + schedule_delayed_work() + debug_object_fixup() + timer_fixup_assert_init() + timer_setup() + do_init_timer() + /* + Above call reinitializes + the timer to + timer->entry.pprev=NULL + and this will be checked + later in timer_pending() call. + */ + timer_pending() + !hlist_unhashed_lockless(&timer->entry) + !h->pprev + /* + del_timer() checks h->pprev and finds + it to be NULL due to which + try_to_grab_pending() stucks. + */ + +Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/ +Signed-off-by: Mukesh Ojha +Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready") +Signed-off-by: Sasha Levin +--- + drivers/base/devcoredump.c | 83 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 81 insertions(+), 2 deletions(-) + +diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c +index f1a3353f34946..1ba4af7f2f21f 100644 +--- a/drivers/base/devcoredump.c ++++ b/drivers/base/devcoredump.c +@@ -29,6 +29,47 @@ struct devcd_entry { + struct device devcd_dev; + void *data; + size_t datalen; ++ /* ++ * Here, mutex is required to serialize the calls to del_wk work between ++ * user/kernel space which happens when devcd is added with device_add() ++ * and that sends uevent to user space. User space reads the uevents, ++ * and calls to devcd_data_write() which try to modify the work which is ++ * not even initialized/queued from devcoredump. ++ * ++ * ++ * ++ * cpu0(X) cpu1(Y) ++ * ++ * dev_coredump() uevent sent to user space ++ * device_add() ======================> user space process Y reads the ++ * uevents writes to devcd fd ++ * which results into writes to ++ * ++ * devcd_data_write() ++ * mod_delayed_work() ++ * try_to_grab_pending() ++ * del_timer() ++ * debug_assert_init() ++ * INIT_DELAYED_WORK() ++ * schedule_delayed_work() ++ * ++ * ++ * Also, mutex alone would not be enough to avoid scheduling of ++ * del_wk work after it get flush from a call to devcd_free() ++ * mentioned as below. ++ * ++ * disabled_store() ++ * devcd_free() ++ * mutex_lock() devcd_data_write() ++ * flush_delayed_work() ++ * mutex_unlock() ++ * mutex_lock() ++ * mod_delayed_work() ++ * mutex_unlock() ++ * So, delete_work flag is required. ++ */ ++ struct mutex mutex; ++ bool delete_work; + struct module *owner; + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen); +@@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, + struct device *dev = kobj_to_dev(kobj); + struct devcd_entry *devcd = dev_to_devcd(dev); + +- mod_delayed_work(system_wq, &devcd->del_wk, 0); ++ mutex_lock(&devcd->mutex); ++ if (!devcd->delete_work) { ++ devcd->delete_work = true; ++ mod_delayed_work(system_wq, &devcd->del_wk, 0); ++ } ++ mutex_unlock(&devcd->mutex); + + return count; + } +@@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data) + { + struct devcd_entry *devcd = dev_to_devcd(dev); + ++ mutex_lock(&devcd->mutex); ++ if (!devcd->delete_work) ++ devcd->delete_work = true; ++ + flush_delayed_work(&devcd->del_wk); ++ mutex_unlock(&devcd->mutex); + return 0; + } + +@@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr, + return sprintf(buf, "%d\n", devcd_disabled); + } + ++/* ++ * ++ * disabled_store() worker() ++ * class_for_each_device(&devcd_class, ++ * NULL, NULL, devcd_free) ++ * ... ++ * ... ++ * while ((dev = class_dev_iter_next(&iter)) ++ * devcd_del() ++ * device_del() ++ * put_device() <- last reference ++ * error = fn(dev, data) devcd_dev_release() ++ * devcd_free(dev, data) kfree(devcd) ++ * mutex_lock(&devcd->mutex); ++ * ++ * ++ * In the above diagram, It looks like disabled_store() would be racing with parallely ++ * running devcd_del() and result in memory abort while acquiring devcd->mutex which ++ * is called after kfree of devcd memory after dropping its last reference with ++ * put_device(). However, this will not happens as fn(dev, data) runs ++ * with its own reference to device via klist_node so it is not its last reference. ++ * so, above situation would not occur. ++ */ ++ + static ssize_t disabled_store(struct class *class, struct class_attribute *attr, + const char *buf, size_t count) + { +@@ -291,13 +366,16 @@ void dev_coredumpm(struct device *dev, struct module *owner, + devcd->read = read; + devcd->free = free; + devcd->failing_dev = get_device(dev); ++ devcd->delete_work = false; + ++ mutex_init(&devcd->mutex); + device_initialize(&devcd->devcd_dev); + + dev_set_name(&devcd->devcd_dev, "devcd%d", + atomic_inc_return(&devcd_count)); + devcd->devcd_dev.class = &devcd_class; + ++ mutex_lock(&devcd->mutex); + if (device_add(&devcd->devcd_dev)) + goto put_device; + +@@ -311,10 +389,11 @@ void dev_coredumpm(struct device *dev, struct module *owner, + + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); +- ++ mutex_unlock(&devcd->mutex); + return; + put_device: + put_device(&devcd->devcd_dev); ++ mutex_unlock(&devcd->mutex); + put_module: + module_put(owner); + free: +-- +2.42.0 + diff --git a/queue-4.19/series b/queue-4.19/series index 7acee50769c..4c45080f8af 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -51,3 +51,5 @@ psample-require-cap_net_admin-when-joining-packets-group.patch drop_monitor-require-cap_sys_admin-when-joining-events-group.patch tools-headers-uapi-sync-linux-perf_event.h-with-the-kernel-sources.patch ib-isert-fix-unaligned-immediate-data-handling.patch +devcoredump-serialize-devcd_del-work.patch +devcoredump-send-uevent-once-devcd-is-ready.patch -- 2.47.3