]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 4.19
authorSasha Levin <sashal@kernel.org>
Tue, 6 Aug 2019 23:27:07 +0000 (19:27 -0400)
committerSasha Levin <sashal@kernel.org>
Tue, 6 Aug 2019 23:27:07 +0000 (19:27 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.19/driver-core-establish-order-of-operations-for-device.patch [new file with mode: 0644]
queue-4.19/drivers-base-introduce-kill_device.patch [new file with mode: 0644]
queue-4.19/libnvdimm-bus-fix-wait_nvdimm_bus_probe_idle-abba-de.patch [new file with mode: 0644]
queue-4.19/libnvdimm-bus-prepare-the-nd_ioctl-path-to-be-re-ent.patch [new file with mode: 0644]
queue-4.19/libnvdimm-bus-prevent-duplicate-device_unregister-ca.patch [new file with mode: 0644]
queue-4.19/libnvdimm-region-register-badblocks-before-namespace.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/driver-core-establish-order-of-operations-for-device.patch b/queue-4.19/driver-core-establish-order-of-operations-for-device.patch
new file mode 100644 (file)
index 0000000..7319430
--- /dev/null
@@ -0,0 +1,138 @@
+From bbc8de07c925f7fe6bb624290e2c607cb76a0051 Mon Sep 17 00:00:00 2001
+From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Date: Mon, 5 Aug 2019 18:31:45 -0700
+Subject: driver core: Establish order of operations for device_add and
+ device_del via bitflag
+
+commit 3451a495ef244a88ed6317a035299d835554d579 upstream.
+
+Add an additional bit flag to the device_private struct named "dead".
+
+This additional flag provides a guarantee that when a device_del is
+executed on a given interface an async worker will not attempt to attach
+the driver following the earlier device_del call. Previously this
+guarantee was not present and could result in the device_del call
+attempting to remove a driver from an interface only to have the async
+worker attempt to probe the driver later when it finally completes the
+asynchronous probe call.
+
+One additional change added was that I pulled the check for dev->driver
+out of the __device_attach_driver call and instead placed it in the
+__device_attach_async_helper call. This was motivated by the fact that the
+only other caller of this, __device_attach, had already taken the
+device_lock() and checked for dev->driver. Instead of testing for this
+twice in this path it makes more sense to just consolidate the dev->dead
+and dev->driver checks together into one set of checks.
+
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/base.h |  4 ++++
+ drivers/base/core.c | 11 +++++++++++
+ drivers/base/dd.c   | 22 +++++++++++-----------
+ 3 files changed, 26 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/base/base.h b/drivers/base/base.h
+index 7a419a7a6235b..559b047de9f75 100644
+--- a/drivers/base/base.h
++++ b/drivers/base/base.h
+@@ -66,6 +66,9 @@ struct driver_private {
+  *    probed first.
+  * @device - pointer back to the struct device that this structure is
+  * associated with.
++ * @dead - This device is currently either in the process of or has been
++ *    removed from the system. Any asynchronous events scheduled for this
++ *    device should exit without taking any action.
+  *
+  * Nothing outside of the driver core should ever touch these fields.
+  */
+@@ -76,6 +79,7 @@ struct device_private {
+       struct klist_node knode_bus;
+       struct list_head deferred_probe;
+       struct device *device;
++      u8 dead:1;
+ };
+ #define to_device_private_parent(obj) \
+       container_of(obj, struct device_private, knode_parent)
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index 92e2c32c22270..37a90d72f3736 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -2050,6 +2050,17 @@ void device_del(struct device *dev)
+       struct kobject *glue_dir = NULL;
+       struct class_interface *class_intf;
++      /*
++       * Hold the device lock and set the "dead" flag to guarantee that
++       * the update behavior is consistent with the other bitfields near
++       * it and that we cannot have an asynchronous probe routine trying
++       * to run while we are tearing out the bus/class/sysfs from
++       * underneath the device.
++       */
++      device_lock(dev);
++      dev->p->dead = true;
++      device_unlock(dev);
++
+       /* Notify clients of device removal.  This call must come
+        * before dpm_sysfs_remove().
+        */
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c
+index d48b310c47603..11d24a552ee49 100644
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -725,15 +725,6 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
+       bool async_allowed;
+       int ret;
+-      /*
+-       * Check if device has already been claimed. This may
+-       * happen with driver loading, device discovery/registration,
+-       * and deferred probe processing happens all at once with
+-       * multiple threads.
+-       */
+-      if (dev->driver)
+-              return -EBUSY;
+-
+       ret = driver_match_device(drv, dev);
+       if (ret == 0) {
+               /* no match */
+@@ -768,6 +759,15 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie)
+       device_lock(dev);
++      /*
++       * Check if device has already been removed or claimed. This may
++       * happen with driver loading, device discovery/registration,
++       * and deferred probe processing happens all at once with
++       * multiple threads.
++       */
++      if (dev->p->dead || dev->driver)
++              goto out_unlock;
++
+       if (dev->parent)
+               pm_runtime_get_sync(dev->parent);
+@@ -778,7 +778,7 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie)
+       if (dev->parent)
+               pm_runtime_put(dev->parent);
+-
++out_unlock:
+       device_unlock(dev);
+       put_device(dev);
+@@ -891,7 +891,7 @@ static int __driver_attach(struct device *dev, void *data)
+       if (dev->parent && dev->bus->need_parent_lock)
+               device_lock(dev->parent);
+       device_lock(dev);
+-      if (!dev->driver)
++      if (!dev->p->dead && !dev->driver)
+               driver_probe_device(drv, dev);
+       device_unlock(dev);
+       if (dev->parent && dev->bus->need_parent_lock)
+-- 
+2.20.1
+
diff --git a/queue-4.19/drivers-base-introduce-kill_device.patch b/queue-4.19/drivers-base-introduce-kill_device.patch
new file mode 100644 (file)
index 0000000..78672f9
--- /dev/null
@@ -0,0 +1,102 @@
+From cdeef88a8ab68835a38c5accf022d8e87333568b Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Aug 2019 18:31:51 -0700
+Subject: drivers/base: Introduce kill_device()
+
+commit 00289cd87676e14913d2d8492d1ce05c4baafdae upstream.
+
+The libnvdimm subsystem arranges for devices to be destroyed as a result
+of a sysfs operation. Since device_unregister() cannot be called from
+an actively running sysfs attribute of the same device libnvdimm
+arranges for device_unregister() to be performed in an out-of-line async
+context.
+
+The driver core maintains a 'dead' state for coordinating its own racing
+async registration / de-registration requests. Rather than add local
+'dead' state tracking infrastructure to libnvdimm device objects, export
+the existing state tracking via a new kill_device() helper.
+
+The kill_device() helper simply marks the device as dead, i.e. that it
+is on its way to device_del(), or returns that the device was already
+dead. This can be used in advance of calling device_unregister() for
+subsystems like libnvdimm that might need to handle multiple user
+threads racing to delete a device.
+
+This refactoring does not change any behavior, but it is a pre-requisite
+for follow-on fixes and therefore marked for -stable.
+
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver...")
+Cc: <stable@vger.kernel.org>
+Tested-by: Jane Chu <jane.chu@oracle.com>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Link: https://lore.kernel.org/r/156341207332.292348.14959761496009347574.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/core.c    | 27 +++++++++++++++++++--------
+ include/linux/device.h |  1 +
+ 2 files changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index 37a90d72f3736..e1a8d5c06f65e 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -2031,6 +2031,24 @@ void put_device(struct device *dev)
+ }
+ EXPORT_SYMBOL_GPL(put_device);
++bool kill_device(struct device *dev)
++{
++      /*
++       * Require the device lock and set the "dead" flag to guarantee that
++       * the update behavior is consistent with the other bitfields near
++       * it and that we cannot have an asynchronous probe routine trying
++       * to run while we are tearing out the bus/class/sysfs from
++       * underneath the device.
++       */
++      lockdep_assert_held(&dev->mutex);
++
++      if (dev->p->dead)
++              return false;
++      dev->p->dead = true;
++      return true;
++}
++EXPORT_SYMBOL_GPL(kill_device);
++
+ /**
+  * device_del - delete device from system.
+  * @dev: device.
+@@ -2050,15 +2068,8 @@ void device_del(struct device *dev)
+       struct kobject *glue_dir = NULL;
+       struct class_interface *class_intf;
+-      /*
+-       * Hold the device lock and set the "dead" flag to guarantee that
+-       * the update behavior is consistent with the other bitfields near
+-       * it and that we cannot have an asynchronous probe routine trying
+-       * to run while we are tearing out the bus/class/sysfs from
+-       * underneath the device.
+-       */
+       device_lock(dev);
+-      dev->p->dead = true;
++      kill_device(dev);
+       device_unlock(dev);
+       /* Notify clients of device removal.  This call must come
+diff --git a/include/linux/device.h b/include/linux/device.h
+index 3f1066a9e1c3a..19dd8852602c4 100644
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -1332,6 +1332,7 @@ extern int (*platform_notify_remove)(struct device *dev);
+  */
+ extern struct device *get_device(struct device *dev);
+ extern void put_device(struct device *dev);
++extern bool kill_device(struct device *dev);
+ #ifdef CONFIG_DEVTMPFS
+ extern int devtmpfs_create_node(struct device *dev);
+-- 
+2.20.1
+
diff --git a/queue-4.19/libnvdimm-bus-fix-wait_nvdimm_bus_probe_idle-abba-de.patch b/queue-4.19/libnvdimm-bus-fix-wait_nvdimm_bus_probe_idle-abba-de.patch
new file mode 100644 (file)
index 0000000..adc1366
--- /dev/null
@@ -0,0 +1,152 @@
+From b872a79514a335730a81434bb7e14ce3e9b04c91 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Aug 2019 18:32:13 -0700
+Subject: libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock
+
+commit ca6bf264f6d856f959c4239cda1047b587745c67 upstream.
+
+A multithreaded namespace creation/destruction stress test currently
+deadlocks with the following lockup signature:
+
+    INFO: task ndctl:2924 blocked for more than 122 seconds.
+          Tainted: G           OE     5.2.0-rc4+ #3382
+    "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+    ndctl           D    0  2924   1176 0x00000000
+    Call Trace:
+     ? __schedule+0x27e/0x780
+     schedule+0x30/0xb0
+     wait_nvdimm_bus_probe_idle+0x8a/0xd0 [libnvdimm]
+     ? finish_wait+0x80/0x80
+     uuid_store+0xe6/0x2e0 [libnvdimm]
+     kernfs_fop_write+0xf0/0x1a0
+     vfs_write+0xb7/0x1b0
+     ksys_write+0x5c/0xd0
+     do_syscall_64+0x60/0x240
+
+     INFO: task ndctl:2923 blocked for more than 122 seconds.
+           Tainted: G           OE     5.2.0-rc4+ #3382
+     "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+     ndctl           D    0  2923   1175 0x00000000
+     Call Trace:
+      ? __schedule+0x27e/0x780
+      ? __mutex_lock+0x489/0x910
+      schedule+0x30/0xb0
+      schedule_preempt_disabled+0x11/0x20
+      __mutex_lock+0x48e/0x910
+      ? nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
+      ? __lock_acquire+0x23f/0x1710
+      ? nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
+      nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
+      __dax_pmem_probe+0x5e/0x210 [dax_pmem_core]
+      ? nvdimm_bus_probe+0x1d0/0x2c0 [libnvdimm]
+      dax_pmem_probe+0xc/0x20 [dax_pmem]
+      nvdimm_bus_probe+0x90/0x2c0 [libnvdimm]
+      really_probe+0xef/0x390
+      driver_probe_device+0xb4/0x100
+
+In this sequence an 'nd_dax' device is being probed and trying to take
+the lock on its backing namespace to validate that the 'nd_dax' device
+indeed has exclusive access to the backing namespace. Meanwhile, another
+thread is trying to update the uuid property of that same backing
+namespace. So one thread is in the probe path trying to acquire the
+lock, and the other thread has acquired the lock and tries to flush the
+probe path.
+
+Fix this deadlock by not holding the namespace device_lock over the
+wait_nvdimm_bus_probe_idle() synchronization step. In turn this requires
+the device_lock to be held on entry to wait_nvdimm_bus_probe_idle() and
+subsequently dropped internally to wait_nvdimm_bus_probe_idle().
+
+Cc: <stable@vger.kernel.org>
+Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation")
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Tested-by: Jane Chu <jane.chu@oracle.com>
+Link: https://lore.kernel.org/r/156341210094.292348.2384694131126767789.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvdimm/bus.c         | 14 +++++++++-----
+ drivers/nvdimm/region_devs.c |  4 ++++
+ 2 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
+index 5abcdb4faa644..2ba22cd1331b0 100644
+--- a/drivers/nvdimm/bus.c
++++ b/drivers/nvdimm/bus.c
+@@ -865,10 +865,12 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
+       do {
+               if (nvdimm_bus->probe_active == 0)
+                       break;
+-              nvdimm_bus_unlock(&nvdimm_bus->dev);
++              nvdimm_bus_unlock(dev);
++              device_unlock(dev);
+               wait_event(nvdimm_bus->wait,
+                               nvdimm_bus->probe_active == 0);
+-              nvdimm_bus_lock(&nvdimm_bus->dev);
++              device_lock(dev);
++              nvdimm_bus_lock(dev);
+       } while (true);
+ }
+@@ -994,7 +996,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               case ND_CMD_ARS_START:
+               case ND_CMD_CLEAR_ERROR:
+               case ND_CMD_CALL:
+-                      dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
++                      dev_dbg(dev, "'%s' command while read-only.\n",
+                                       nvdimm ? nvdimm_cmd_name(cmd)
+                                       : nvdimm_bus_cmd_name(cmd));
+                       return -EPERM;
+@@ -1083,7 +1085,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               goto out;
+       }
+-      nvdimm_bus_lock(&nvdimm_bus->dev);
++      device_lock(dev);
++      nvdimm_bus_lock(dev);
+       rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
+       if (rc)
+               goto out_unlock;
+@@ -1103,7 +1106,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               rc = -EFAULT;
+ out_unlock:
+-      nvdimm_bus_unlock(&nvdimm_bus->dev);
++      nvdimm_bus_unlock(dev);
++      device_unlock(dev);
+ out:
+       kfree(in_env);
+       kfree(out_env);
+diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
+index e7377f1028ef6..0303296e6d5b6 100644
+--- a/drivers/nvdimm/region_devs.c
++++ b/drivers/nvdimm/region_devs.c
+@@ -425,10 +425,12 @@ static ssize_t available_size_show(struct device *dev,
+        * memory nvdimm_bus_lock() is dropped, but that's userspace's
+        * problem to not race itself.
+        */
++      device_lock(dev);
+       nvdimm_bus_lock(dev);
+       wait_nvdimm_bus_probe_idle(dev);
+       available = nd_region_available_dpa(nd_region);
+       nvdimm_bus_unlock(dev);
++      device_unlock(dev);
+       return sprintf(buf, "%llu\n", available);
+ }
+@@ -440,10 +442,12 @@ static ssize_t max_available_extent_show(struct device *dev,
+       struct nd_region *nd_region = to_nd_region(dev);
+       unsigned long long available = 0;
++      device_lock(dev);
+       nvdimm_bus_lock(dev);
+       wait_nvdimm_bus_probe_idle(dev);
+       available = nd_region_allocatable_dpa(nd_region);
+       nvdimm_bus_unlock(dev);
++      device_unlock(dev);
+       return sprintf(buf, "%llu\n", available);
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.19/libnvdimm-bus-prepare-the-nd_ioctl-path-to-be-re-ent.patch b/queue-4.19/libnvdimm-bus-prepare-the-nd_ioctl-path-to-be-re-ent.patch
new file mode 100644 (file)
index 0000000..9fc0b04
--- /dev/null
@@ -0,0 +1,164 @@
+From d30d367308d5dbaf26246077168c162938266a4c Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Aug 2019 18:32:07 -0700
+Subject: libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant
+
+commit 6de5d06e657acdbcf9637dac37916a4a5309e0f4 upstream.
+
+In preparation for not holding a lock over the execution of nd_ioctl(),
+update the implementation to allow multiple threads to be attempting
+ioctls at the same time. The bus lock still prevents multiple in-flight
+->ndctl() invocations from corrupting each other's state, but static
+global staging buffers are moved to the heap.
+
+Reported-by: Vishal Verma <vishal.l.verma@intel.com>
+Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
+Tested-by: Vishal Verma <vishal.l.verma@intel.com>
+Link: https://lore.kernel.org/r/156341208947.292348.10560140326807607481.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvdimm/bus.c | 59 +++++++++++++++++++++++++++-----------------
+ 1 file changed, 37 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
+index 11cfd23e5aff7..5abcdb4faa644 100644
+--- a/drivers/nvdimm/bus.c
++++ b/drivers/nvdimm/bus.c
+@@ -951,20 +951,19 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               int read_only, unsigned int ioctl_cmd, unsigned long arg)
+ {
+       struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+-      static char out_env[ND_CMD_MAX_ENVELOPE];
+-      static char in_env[ND_CMD_MAX_ENVELOPE];
+       const struct nd_cmd_desc *desc = NULL;
+       unsigned int cmd = _IOC_NR(ioctl_cmd);
+       struct device *dev = &nvdimm_bus->dev;
+       void __user *p = (void __user *) arg;
++      char *out_env = NULL, *in_env = NULL;
+       const char *cmd_name, *dimm_name;
+       u32 in_len = 0, out_len = 0;
+       unsigned int func = cmd;
+       unsigned long cmd_mask;
+       struct nd_cmd_pkg pkg;
+       int rc, i, cmd_rc;
++      void *buf = NULL;
+       u64 buf_len = 0;
+-      void *buf;
+       if (nvdimm) {
+               desc = nd_cmd_dimm_desc(cmd);
+@@ -1004,6 +1003,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               }
+       /* process an input envelope */
++      in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
++      if (!in_env)
++              return -ENOMEM;
+       for (i = 0; i < desc->in_num; i++) {
+               u32 in_size, copy;
+@@ -1011,14 +1013,17 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               if (in_size == UINT_MAX) {
+                       dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
+                                       __func__, dimm_name, cmd_name, i);
+-                      return -ENXIO;
++                      rc = -ENXIO;
++                      goto out;
+               }
+-              if (in_len < sizeof(in_env))
+-                      copy = min_t(u32, sizeof(in_env) - in_len, in_size);
++              if (in_len < ND_CMD_MAX_ENVELOPE)
++                      copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
+               else
+                       copy = 0;
+-              if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
+-                      return -EFAULT;
++              if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
++                      rc = -EFAULT;
++                      goto out;
++              }
+               in_len += in_size;
+       }
+@@ -1030,6 +1035,12 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+       }
+       /* process an output envelope */
++      out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
++      if (!out_env) {
++              rc = -ENOMEM;
++              goto out;
++      }
++
+       for (i = 0; i < desc->out_num; i++) {
+               u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
+                               (u32 *) in_env, (u32 *) out_env, 0);
+@@ -1038,15 +1049,18 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               if (out_size == UINT_MAX) {
+                       dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
+                                       dimm_name, cmd_name, i);
+-                      return -EFAULT;
++                      rc = -EFAULT;
++                      goto out;
+               }
+-              if (out_len < sizeof(out_env))
+-                      copy = min_t(u32, sizeof(out_env) - out_len, out_size);
++              if (out_len < ND_CMD_MAX_ENVELOPE)
++                      copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
+               else
+                       copy = 0;
+               if (copy && copy_from_user(&out_env[out_len],
+-                                      p + in_len + out_len, copy))
+-                      return -EFAULT;
++                                      p + in_len + out_len, copy)) {
++                      rc = -EFAULT;
++                      goto out;
++              }
+               out_len += out_size;
+       }
+@@ -1054,12 +1068,15 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+       if (buf_len > ND_IOCTL_MAX_BUFLEN) {
+               dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
+                               cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
+-              return -EINVAL;
++              rc = -EINVAL;
++              goto out;
+       }
+       buf = vmalloc(buf_len);
+-      if (!buf)
+-              return -ENOMEM;
++      if (!buf) {
++              rc = -ENOMEM;
++              goto out;
++      }
+       if (copy_from_user(buf, p, buf_len)) {
+               rc = -EFAULT;
+@@ -1081,17 +1098,15 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+               nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
+                               clear_err->cleared);
+       }
+-      nvdimm_bus_unlock(&nvdimm_bus->dev);
+       if (copy_to_user(p, buf, buf_len))
+               rc = -EFAULT;
+-      vfree(buf);
+-      return rc;
+-
+- out_unlock:
++out_unlock:
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+- out:
++out:
++      kfree(in_env);
++      kfree(out_env);
+       vfree(buf);
+       return rc;
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.19/libnvdimm-bus-prevent-duplicate-device_unregister-ca.patch b/queue-4.19/libnvdimm-bus-prevent-duplicate-device_unregister-ca.patch
new file mode 100644 (file)
index 0000000..9442f36
--- /dev/null
@@ -0,0 +1,95 @@
+From 5a6988df65317014d7183ee56a68dc90a81e1cb0 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Aug 2019 18:31:56 -0700
+Subject: libnvdimm/bus: Prevent duplicate device_unregister() calls
+
+commit 8aac0e2338916e273ccbd438a2b7a1e8c61749f5 upstream.
+
+A multithreaded namespace creation/destruction stress test currently
+fails with signatures like the following:
+
+    sysfs group 'power' not found for kobject 'dax1.1'
+    RIP: 0010:sysfs_remove_group+0x76/0x80
+    Call Trace:
+     device_del+0x73/0x370
+     device_unregister+0x16/0x50
+     nd_async_device_unregister+0x1e/0x30 [libnvdimm]
+     async_run_entry_fn+0x39/0x160
+     process_one_work+0x23c/0x5e0
+     worker_thread+0x3c/0x390
+
+    BUG: kernel NULL pointer dereference, address: 0000000000000020
+    RIP: 0010:klist_put+0x1b/0x6c
+    Call Trace:
+     klist_del+0xe/0x10
+     device_del+0x8a/0x2c9
+     ? __switch_to_asm+0x34/0x70
+     ? __switch_to_asm+0x40/0x70
+     device_unregister+0x44/0x4f
+     nd_async_device_unregister+0x22/0x2d [libnvdimm]
+     async_run_entry_fn+0x47/0x15a
+     process_one_work+0x1a2/0x2eb
+     worker_thread+0x1b8/0x26e
+
+Use the kill_device() helper to atomically resolve the race of multiple
+threads issuing kill, device_unregister(), requests.
+
+Reported-by: Jane Chu <jane.chu@oracle.com>
+Reported-by: Erwin Tsaur <erwin.tsaur@oracle.com>
+Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver...")
+Cc: <stable@vger.kernel.org>
+Link: https://github.com/pmem/ndctl/issues/96
+Tested-by: Tested-by: Jane Chu <jane.chu@oracle.com>
+Link: https://lore.kernel.org/r/156341207846.292348.10435719262819764054.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvdimm/bus.c | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
+index ee39e2c1644ae..11cfd23e5aff7 100644
+--- a/drivers/nvdimm/bus.c
++++ b/drivers/nvdimm/bus.c
+@@ -528,13 +528,38 @@ EXPORT_SYMBOL(nd_device_register);
+ void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
+ {
++      bool killed;
++
+       switch (mode) {
+       case ND_ASYNC:
++              /*
++               * In the async case this is being triggered with the
++               * device lock held and the unregistration work needs to
++               * be moved out of line iff this is thread has won the
++               * race to schedule the deletion.
++               */
++              if (!kill_device(dev))
++                      return;
++
+               get_device(dev);
+               async_schedule_domain(nd_async_device_unregister, dev,
+                               &nd_async_domain);
+               break;
+       case ND_SYNC:
++              /*
++               * In the sync case the device is being unregistered due
++               * to a state change of the parent. Claim the kill state
++               * to synchronize against other unregistration requests,
++               * or otherwise let the async path handle it if the
++               * unregistration was already queued.
++               */
++              device_lock(dev);
++              killed = kill_device(dev);
++              device_unlock(dev);
++
++              if (!killed)
++                      return;
++
+               nd_synchronize();
+               device_unregister(dev);
+               break;
+-- 
+2.20.1
+
diff --git a/queue-4.19/libnvdimm-region-register-badblocks-before-namespace.patch b/queue-4.19/libnvdimm-region-register-badblocks-before-namespace.patch
new file mode 100644 (file)
index 0000000..655d753
--- /dev/null
@@ -0,0 +1,93 @@
+From 324de84f263f4115681d84853460e3a72d42ec63 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Aug 2019 18:32:02 -0700
+Subject: libnvdimm/region: Register badblocks before namespaces
+
+commit 700cd033a82d466ad8f9615f9985525e45f8960a upstream.
+
+Namespace activation expects to be able to reference region badblocks.
+The following warning sometimes triggers when asynchronous namespace
+activation races in front of the completion of namespace probing. Move
+all possible namespace probing after region badblocks initialization.
+
+Otherwise, lockdep sometimes catches the uninitialized state of the
+badblocks seqlock with stack trace signatures like:
+
+    INFO: trying to register non-static key.
+    pmem2: detected capacity change from 0 to 136365211648
+    the code is fine but needs lockdep annotation.
+    turning off the locking correctness validator.
+    CPU: 9 PID: 358 Comm: kworker/u80:5 Tainted: G           OE     5.2.0-rc4+ #3382
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
+    Workqueue: events_unbound async_run_entry_fn
+    Call Trace:
+     dump_stack+0x85/0xc0
+    pmem1.12: detected capacity change from 0 to 8589934592
+     register_lock_class+0x56a/0x570
+     ? check_object+0x140/0x270
+     __lock_acquire+0x80/0x1710
+     ? __mutex_lock+0x39d/0x910
+     lock_acquire+0x9e/0x180
+     ? nd_pfn_validate+0x28f/0x440 [libnvdimm]
+     badblocks_check+0x93/0x1f0
+     ? nd_pfn_validate+0x28f/0x440 [libnvdimm]
+     nd_pfn_validate+0x28f/0x440 [libnvdimm]
+     ? lockdep_hardirqs_on+0xf0/0x180
+     nd_dax_probe+0x9a/0x120 [libnvdimm]
+     nd_pmem_probe+0x6d/0x180 [nd_pmem]
+     nvdimm_bus_probe+0x90/0x2c0 [libnvdimm]
+
+Fixes: 48af2f7e52f4 ("libnvdimm, pfn: during init, clear errors...")
+Cc: <stable@vger.kernel.org>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
+Link: https://lore.kernel.org/r/156341208365.292348.1547528796026249120.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvdimm/region.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
+index b9ca0033cc999..f9130cc157e83 100644
+--- a/drivers/nvdimm/region.c
++++ b/drivers/nvdimm/region.c
+@@ -42,17 +42,6 @@ static int nd_region_probe(struct device *dev)
+       if (rc)
+               return rc;
+-      rc = nd_region_register_namespaces(nd_region, &err);
+-      if (rc < 0)
+-              return rc;
+-
+-      ndrd = dev_get_drvdata(dev);
+-      ndrd->ns_active = rc;
+-      ndrd->ns_count = rc + err;
+-
+-      if (rc && err && rc == err)
+-              return -ENODEV;
+-
+       if (is_nd_pmem(&nd_region->dev)) {
+               struct resource ndr_res;
+@@ -68,6 +57,17 @@ static int nd_region_probe(struct device *dev)
+               nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
+       }
++      rc = nd_region_register_namespaces(nd_region, &err);
++      if (rc < 0)
++              return rc;
++
++      ndrd = dev_get_drvdata(dev);
++      ndrd->ns_active = rc;
++      ndrd->ns_count = rc + err;
++
++      if (rc && err && rc == err)
++              return -ENODEV;
++
+       nd_region->btt_seed = nd_btt_create(nd_region);
+       nd_region->pfn_seed = nd_pfn_create(nd_region);
+       nd_region->dax_seed = nd_dax_create(nd_region);
+-- 
+2.20.1
+
index bfa4408f13ea7acbc3af2344fb2441bf07927d57..0318338816d05beed2947f8de9f3dfbbf55f6c8d 100644 (file)
@@ -1,2 +1,8 @@
 scsi-fcoe-embed-fc_rport_priv-in-fcoe_rport-structure.patch
 gcc-9-don-t-warn-about-uninitialized-variable.patch
+driver-core-establish-order-of-operations-for-device.patch
+drivers-base-introduce-kill_device.patch
+libnvdimm-bus-prevent-duplicate-device_unregister-ca.patch
+libnvdimm-region-register-badblocks-before-namespace.patch
+libnvdimm-bus-prepare-the-nd_ioctl-path-to-be-re-ent.patch
+libnvdimm-bus-fix-wait_nvdimm_bus_probe_idle-abba-de.patch