From: Greg Kroah-Hartman Date: Mon, 26 Jul 2021 09:17:44 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v4.4.277~38 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dfe36e0857d13f461fe8e07f191f549742f3e30e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: driver-core-prevent-warning-when-removing-a-device-link-from-unregistered-consumer.patch drm-amdgpu-update-golden-setting-for-sienna_cichlid.patch drm-return-enotty-for-non-drm-ioctls.patch hugetlbfs-fix-mount-mode-command-line-processing.patch memblock-make-for_each_mem_range-traverse-memblock_hotplug-regions.patch misc-eeprom-at24-always-append-device-id-even-if-label-property-is-set.patch nds32-fix-up-stack-guard-gap.patch rbd-always-kick-acquire-on-acquired-and-released-notifications.patch rbd-don-t-hold-lock_rwsem-while-running_list-is-being-drained.patch revert-drm-i915-propagate-errors-on-awaiting-already-signaled-fences.patch --- diff --git a/queue-5.10/driver-core-prevent-warning-when-removing-a-device-link-from-unregistered-consumer.patch b/queue-5.10/driver-core-prevent-warning-when-removing-a-device-link-from-unregistered-consumer.patch new file mode 100644 index 00000000000..b98bf1dcaa9 --- /dev/null +++ b/queue-5.10/driver-core-prevent-warning-when-removing-a-device-link-from-unregistered-consumer.patch @@ -0,0 +1,39 @@ +From e64daad660a0c9ace3acdc57099fffe5ed83f977 Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Fri, 16 Jul 2021 14:44:07 +0300 +Subject: driver core: Prevent warning when removing a device link from unregistered consumer + +From: Adrian Hunter + +commit e64daad660a0c9ace3acdc57099fffe5ed83f977 upstream. + +sysfs_remove_link() causes a warning if the parent directory does not +exist. That can happen if the device link consumer has not been registered. +So do not attempt sysfs_remove_link() in that case. + +Fixes: 287905e68dd29 ("driver core: Expose device link details in sysfs") +Signed-off-by: Adrian Hunter +Cc: stable@vger.kernel.org # 5.9+ +Reviewed-by: Rafael J. Wysocki +Link: https://lore.kernel.org/r/20210716114408.17320-2-adrian.hunter@intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -449,8 +449,10 @@ static void devlink_remove_symlinks(stru + return; + } + +- snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); +- sysfs_remove_link(&con->kobj, buf); ++ if (device_is_registered(con)) { ++ snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); ++ sysfs_remove_link(&con->kobj, buf); ++ } + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); + sysfs_remove_link(&sup->kobj, buf); + kfree(buf); diff --git a/queue-5.10/drm-amdgpu-update-golden-setting-for-sienna_cichlid.patch b/queue-5.10/drm-amdgpu-update-golden-setting-for-sienna_cichlid.patch new file mode 100644 index 00000000000..63432416b2b --- /dev/null +++ b/queue-5.10/drm-amdgpu-update-golden-setting-for-sienna_cichlid.patch @@ -0,0 +1,30 @@ +From 3e94b5965e624f7e6d8dd18eb8f3bf2bb99ba30d Mon Sep 17 00:00:00 2001 +From: Likun Gao +Date: Thu, 15 Jul 2021 11:08:48 +0800 +Subject: drm/amdgpu: update golden setting for sienna_cichlid + +From: Likun Gao + +commit 3e94b5965e624f7e6d8dd18eb8f3bf2bb99ba30d upstream. + +Update GFX golden setting for sienna_cichlid. + +Signed-off-by: Likun Gao +Reviewed-by: Hawking Zhang +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -3137,6 +3137,7 @@ static const struct soc15_reg_golden gol + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), ++ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) + }; diff --git a/queue-5.10/drm-return-enotty-for-non-drm-ioctls.patch b/queue-5.10/drm-return-enotty-for-non-drm-ioctls.patch new file mode 100644 index 00000000000..dd13f4f8360 --- /dev/null +++ b/queue-5.10/drm-return-enotty-for-non-drm-ioctls.patch @@ -0,0 +1,56 @@ +From 3abab27c322e0f2acf981595aa8040c9164dc9fb Mon Sep 17 00:00:00 2001 +From: Charles Baylis +Date: Fri, 16 Jul 2021 17:43:12 +0100 +Subject: drm: Return -ENOTTY for non-drm ioctls + +From: Charles Baylis + +commit 3abab27c322e0f2acf981595aa8040c9164dc9fb upstream. + +drm: Return -ENOTTY for non-drm ioctls + +Return -ENOTTY from drm_ioctl() when userspace passes in a cmd number +which doesn't relate to the drm subsystem. + +Glibc uses the TCGETS ioctl to implement isatty(), and without this +change isatty() returns it incorrectly returns true for drm devices. + +To test run this command: +$ if [ -t 0 ]; then echo is a tty; fi < /dev/dri/card0 +which shows "is a tty" without this patch. + +This may also modify memory which the userspace application is not +expecting. + +Signed-off-by: Charles Baylis +Cc: stable@vger.kernel.org +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/YPG3IBlzaMhfPqCr@stando.fishzet.co.uk +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/drm_ioctl.c | 3 +++ + include/drm/drm_ioctl.h | 1 + + 2 files changed, 4 insertions(+) + +--- a/drivers/gpu/drm/drm_ioctl.c ++++ b/drivers/gpu/drm/drm_ioctl.c +@@ -827,6 +827,9 @@ long drm_ioctl(struct file *filp, + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + ++ if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE) ++ return -ENOTTY; ++ + is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; + + if (is_driver_ioctl) { +--- a/include/drm/drm_ioctl.h ++++ b/include/drm/drm_ioctl.h +@@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct fi + unsigned long arg); + + #define DRM_IOCTL_NR(n) _IOC_NR(n) ++#define DRM_IOCTL_TYPE(n) _IOC_TYPE(n) + #define DRM_MAJOR 226 + + /** diff --git a/queue-5.10/hugetlbfs-fix-mount-mode-command-line-processing.patch b/queue-5.10/hugetlbfs-fix-mount-mode-command-line-processing.patch new file mode 100644 index 00000000000..0921c4fa433 --- /dev/null +++ b/queue-5.10/hugetlbfs-fix-mount-mode-command-line-processing.patch @@ -0,0 +1,43 @@ +From e0f7e2b2f7e7864238a4eea05cc77ae1be2bf784 Mon Sep 17 00:00:00 2001 +From: Mike Kravetz +Date: Fri, 23 Jul 2021 15:50:44 -0700 +Subject: hugetlbfs: fix mount mode command line processing + +From: Mike Kravetz + +commit e0f7e2b2f7e7864238a4eea05cc77ae1be2bf784 upstream. + +In commit 32021982a324 ("hugetlbfs: Convert to fs_context") processing +of the mount mode string was changed from match_octal() to fsparam_u32. + +This changed existing behavior as match_octal does not require octal +values to have a '0' prefix, but fsparam_u32 does. + +Use fsparam_u32oct which provides the same behavior as match_octal. + +Link: https://lkml.kernel.org/r/20210721183326.102716-1-mike.kravetz@oracle.com +Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") +Signed-off-by: Mike Kravetz +Reported-by: Dennis Camera +Reviewed-by: Matthew Wilcox (Oracle) +Cc: David Howells +Cc: Al Viro +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -77,7 +77,7 @@ enum hugetlb_param { + static const struct fs_parameter_spec hugetlb_fs_parameters[] = { + fsparam_u32 ("gid", Opt_gid), + fsparam_string("min_size", Opt_min_size), +- fsparam_u32 ("mode", Opt_mode), ++ fsparam_u32oct("mode", Opt_mode), + fsparam_string("nr_inodes", Opt_nr_inodes), + fsparam_string("pagesize", Opt_pagesize), + fsparam_string("size", Opt_size), diff --git a/queue-5.10/memblock-make-for_each_mem_range-traverse-memblock_hotplug-regions.patch b/queue-5.10/memblock-make-for_each_mem_range-traverse-memblock_hotplug-regions.patch new file mode 100644 index 00000000000..581c5c77564 --- /dev/null +++ b/queue-5.10/memblock-make-for_each_mem_range-traverse-memblock_hotplug-regions.patch @@ -0,0 +1,105 @@ +From 79e482e9c3ae86e849c701c846592e72baddda5a Mon Sep 17 00:00:00 2001 +From: Mike Rapoport +Date: Fri, 23 Jul 2021 15:50:26 -0700 +Subject: memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions + +From: Mike Rapoport + +commit 79e482e9c3ae86e849c701c846592e72baddda5a upstream. + +Commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with +for_each_mem_range()") didn't take into account that when there is +movable_node parameter in the kernel command line, for_each_mem_range() +would skip ranges marked with MEMBLOCK_HOTPLUG. + +The page table setup code in POWER uses for_each_mem_range() to create +the linear mapping of the physical memory and since the regions marked +as MEMORY_HOTPLUG are skipped, they never make it to the linear map. + +A later access to the memory in those ranges will fail: + + BUG: Unable to handle kernel data access on write at 0xc000000400000000 + Faulting instruction address: 0xc00000000008a3c0 + Oops: Kernel access of bad area, sig: 11 [#1] + LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries + Modules linked in: + CPU: 0 PID: 53 Comm: kworker/u2:0 Not tainted 5.13.0 #7 + NIP: c00000000008a3c0 LR: c0000000003c1ed8 CTR: 0000000000000040 + REGS: c000000008a57770 TRAP: 0300 Not tainted (5.13.0) + MSR: 8000000002009033 CR: 84222202 XER: 20040000 + CFAR: c0000000003c1ed4 DAR: c000000400000000 DSISR: 42000000 IRQMASK: 0 + GPR00: c0000000003c1ed8 c000000008a57a10 c0000000019da700 c000000400000000 + GPR04: 0000000000000280 0000000000000180 0000000000000400 0000000000000200 + GPR08: 0000000000000100 0000000000000080 0000000000000040 0000000000000300 + GPR12: 0000000000000380 c000000001bc0000 c0000000001660c8 c000000006337e00 + GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + GPR20: 0000000040000000 0000000020000000 c000000001a81990 c000000008c30000 + GPR24: c000000008c20000 c000000001a81998 000fffffffff0000 c000000001a819a0 + GPR28: c000000001a81908 c00c000001000000 c000000008c40000 c000000008a64680 + NIP clear_user_page+0x50/0x80 + LR __handle_mm_fault+0xc88/0x1910 + Call Trace: + __handle_mm_fault+0xc44/0x1910 (unreliable) + handle_mm_fault+0x130/0x2a0 + __get_user_pages+0x248/0x610 + __get_user_pages_remote+0x12c/0x3e0 + get_arg_page+0x54/0xf0 + copy_string_kernel+0x11c/0x210 + kernel_execve+0x16c/0x220 + call_usermodehelper_exec_async+0x1b0/0x2f0 + ret_from_kernel_thread+0x5c/0x70 + Instruction dump: + 79280fa4 79271764 79261f24 794ae8e2 7ca94214 7d683a14 7c893a14 7d893050 + 7d4903a6 60000000 60000000 60000000 <7c001fec> 7c091fec 7c081fec 7c051fec + ---[ end trace 490b8c67e6075e09 ]--- + +Making for_each_mem_range() include MEMBLOCK_HOTPLUG regions in the +traversal fixes this issue. + +Link: https://bugzilla.redhat.com/show_bug.cgi?id=1976100 +Link: https://lkml.kernel.org/r/20210712071132.20902-1-rppt@kernel.org +Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") +Signed-off-by: Mike Rapoport +Tested-by: Greg Kurz +Reviewed-by: David Hildenbrand +Cc: [5.10+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memblock.h | 4 ++-- + mm/memblock.c | 3 ++- + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/include/linux/memblock.h ++++ b/include/linux/memblock.h +@@ -207,7 +207,7 @@ static inline void __next_physmem_range( + */ + #define for_each_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ +- MEMBLOCK_NONE, p_start, p_end, NULL) ++ MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + + /** + * for_each_mem_range_rev - reverse iterate through memblock areas from +@@ -218,7 +218,7 @@ static inline void __next_physmem_range( + */ + #define for_each_mem_range_rev(i, p_start, p_end) \ + __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ +- MEMBLOCK_NONE, p_start, p_end, NULL) ++ MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + + /** + * for_each_reserved_mem_range - iterate over all reserved memblock areas +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -940,7 +940,8 @@ static bool should_skip_region(struct me + return true; + + /* skip hotpluggable memory regions if needed */ +- if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) ++ if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && ++ !(flags & MEMBLOCK_HOTPLUG)) + return true; + + /* if we want mirror memory skip non-mirror memory regions */ diff --git a/queue-5.10/misc-eeprom-at24-always-append-device-id-even-if-label-property-is-set.patch b/queue-5.10/misc-eeprom-at24-always-append-device-id-even-if-label-property-is-set.patch new file mode 100644 index 00000000000..540be17ef89 --- /dev/null +++ b/queue-5.10/misc-eeprom-at24-always-append-device-id-even-if-label-property-is-set.patch @@ -0,0 +1,62 @@ +From c36748ac545421d94a5091c754414c0f3664bf10 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= +Date: Thu, 1 Jul 2021 08:28:25 -0700 +Subject: misc: eeprom: at24: Always append device id even if label property is set. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jérôme Glisse + +commit c36748ac545421d94a5091c754414c0f3664bf10 upstream. + +We need to append device id even if eeprom have a label property set as some +platform can have multiple eeproms with same label and we can not register +each of those with same label. Failing to register those eeproms trigger +cascade failures on such platform (system is no longer working). + +This fix regression on such platform introduced with 4e302c3b568e + +Reported-by: Alexander Fomichev +Fixes: 4e302c3b568e ("misc: eeprom: at24: fix NVMEM name with custom AT24 device name") +Cc: stable@vger.kernel.org +Signed-off-by: Jérôme Glisse +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/misc/eeprom/at24.c | 17 +++++++---------- + 1 file changed, 7 insertions(+), 10 deletions(-) + +--- a/drivers/misc/eeprom/at24.c ++++ b/drivers/misc/eeprom/at24.c +@@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client + } + + /* +- * If the 'label' property is not present for the AT24 EEPROM, +- * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, +- * and this will append the 'devid' to the name of the NVMEM +- * device. This is purely legacy and the AT24 driver has always +- * defaulted to this. However, if the 'label' property is +- * present then this means that the name is specified by the +- * firmware and this name should be used verbatim and so it is +- * not necessary to append the 'devid'. ++ * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the ++ * label property is set as some platform can have multiple eeproms ++ * with same label and we can not register each of those with same ++ * label. Failing to register those eeproms trigger cascade failure ++ * on such platform. + */ ++ nvmem_config.id = NVMEM_DEVID_AUTO; ++ + if (device_property_present(dev, "label")) { +- nvmem_config.id = NVMEM_DEVID_NONE; + err = device_property_read_string(dev, "label", + &nvmem_config.name); + if (err) + return err; + } else { +- nvmem_config.id = NVMEM_DEVID_AUTO; + nvmem_config.name = dev_name(dev); + } + diff --git a/queue-5.10/nds32-fix-up-stack-guard-gap.patch b/queue-5.10/nds32-fix-up-stack-guard-gap.patch new file mode 100644 index 00000000000..e373002b395 --- /dev/null +++ b/queue-5.10/nds32-fix-up-stack-guard-gap.patch @@ -0,0 +1,42 @@ +From c453db6cd96418c79702eaf38259002755ab23ff Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Tue, 29 Jun 2021 12:40:24 +0200 +Subject: nds32: fix up stack guard gap + +From: Greg Kroah-Hartman + +commit c453db6cd96418c79702eaf38259002755ab23ff upstream. + +Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") fixed +up all architectures to deal with the stack guard gap. But when nds32 +was added to the tree, it forgot to do the same thing. + +Resolve this by properly fixing up the nsd32's version of +arch_get_unmapped_area() + +Cc: Nick Hu +Cc: Greentime Hu +Cc: Vincent Chen +Cc: Michal Hocko +Cc: Hugh Dickins +Cc: Qiang Liu +Cc: stable +Reported-by: iLifetruth +Acked-by: Hugh Dickins +Link: https://lore.kernel.org/r/20210629104024.2293615-1-gregkh@linuxfoundation.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/nds32/mm/mmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/nds32/mm/mmap.c ++++ b/arch/nds32/mm/mmap.c +@@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + diff --git a/queue-5.10/rbd-always-kick-acquire-on-acquired-and-released-notifications.patch b/queue-5.10/rbd-always-kick-acquire-on-acquired-and-released-notifications.patch new file mode 100644 index 00000000000..5c1e68066c2 --- /dev/null +++ b/queue-5.10/rbd-always-kick-acquire-on-acquired-and-released-notifications.patch @@ -0,0 +1,71 @@ +From 8798d070d416d18a75770fc19787e96705073f43 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Sat, 3 Jul 2021 11:56:55 +0200 +Subject: rbd: always kick acquire on "acquired" and "released" notifications + +From: Ilya Dryomov + +commit 8798d070d416d18a75770fc19787e96705073f43 upstream. + +Skipping the "lock has been released" notification if the lock owner +is not what we expect based on owner_cid can lead to I/O hangs. +One example is our own notifications: because owner_cid is cleared +in rbd_unlock(), when we get our own notification it is processed as +unexpected/duplicate and maybe_kick_acquire() isn't called. If a peer +that requested the lock then doesn't go through with acquiring it, +I/O requests that came in while the lock was being quiesced would +be stalled until another I/O request is submitted and kicks acquire +from rbd_img_exclusive_lock(). + +This makes the comment in rbd_release_lock() actually true: prior to +this change the canceled work was being requeued in response to the +"lock has been acquired" notification from rbd_handle_acquired_lock(). + +Cc: stable@vger.kernel.org # 5.3+ +Signed-off-by: Ilya Dryomov +Tested-by: Robin Geuze +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/rbd.c | 20 +++++++------------- + 1 file changed, 7 insertions(+), 13 deletions(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -4248,15 +4248,11 @@ static void rbd_handle_acquired_lock(str + if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { + down_write(&rbd_dev->lock_rwsem); + if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { +- /* +- * we already know that the remote client is +- * the owner +- */ +- up_write(&rbd_dev->lock_rwsem); +- return; ++ dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", ++ __func__, rbd_dev, cid.gid, cid.handle); ++ } else { ++ rbd_set_owner_cid(rbd_dev, &cid); + } +- +- rbd_set_owner_cid(rbd_dev, &cid); + downgrade_write(&rbd_dev->lock_rwsem); + } else { + down_read(&rbd_dev->lock_rwsem); +@@ -4281,14 +4277,12 @@ static void rbd_handle_released_lock(str + if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { + down_write(&rbd_dev->lock_rwsem); + if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { +- dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", ++ dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", + __func__, rbd_dev, cid.gid, cid.handle, + rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); +- up_write(&rbd_dev->lock_rwsem); +- return; ++ } else { ++ rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); + } +- +- rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); + downgrade_write(&rbd_dev->lock_rwsem); + } else { + down_read(&rbd_dev->lock_rwsem); diff --git a/queue-5.10/rbd-don-t-hold-lock_rwsem-while-running_list-is-being-drained.patch b/queue-5.10/rbd-don-t-hold-lock_rwsem-while-running_list-is-being-drained.patch new file mode 100644 index 00000000000..d76b4130b30 --- /dev/null +++ b/queue-5.10/rbd-don-t-hold-lock_rwsem-while-running_list-is-being-drained.patch @@ -0,0 +1,75 @@ +From ed9eb71085ecb7ded9a5118cec2ab70667cc7350 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Sat, 3 Jul 2021 11:31:26 +0200 +Subject: rbd: don't hold lock_rwsem while running_list is being drained + +From: Ilya Dryomov + +commit ed9eb71085ecb7ded9a5118cec2ab70667cc7350 upstream. + +Currently rbd_quiesce_lock() holds lock_rwsem for read while blocking +on releasing_wait completion. On the I/O completion side, each image +request also needs to take lock_rwsem for read. Because rw_semaphore +implementation doesn't allow new readers after a writer has indicated +interest in the lock, this can result in a deadlock if something that +needs to take lock_rwsem for write gets involved. For example: + +1. watch error occurs +2. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and + releases lock_rwsem +3. after reestablishing the watch, rbd_reregister_watch() takes + lock_rwsem for write and calls rbd_reacquire_lock() +4. rbd_quiesce_lock() downgrades lock_rwsem to for read and blocks on + releasing_wait until running_list becomes empty +5. another watch error occurs +6. rbd_watch_errcb() blocks trying to take lock_rwsem for write +7. no in-flight image request can complete and delete itself from + running_list because lock_rwsem won't be granted anymore + +A similar scenario can occur with "lock has been acquired" and "lock +has been released" notification handers which also take lock_rwsem for +write to update owner_cid. + +We don't actually get anything useful from sitting on lock_rwsem in +rbd_quiesce_lock() -- owner_cid updates certainly don't need to be +synchronized with. In fact the whole owner_cid tracking logic could +probably be removed from the kernel client because we don't support +proxied maintenance operations. + +Cc: stable@vger.kernel.org # 5.3+ +URL: https://tracker.ceph.com/issues/42757 +Signed-off-by: Ilya Dryomov +Tested-by: Robin Geuze +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/rbd.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -4147,8 +4147,6 @@ again: + + static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) + { +- bool need_wait; +- + dout("%s rbd_dev %p\n", __func__, rbd_dev); + lockdep_assert_held_write(&rbd_dev->lock_rwsem); + +@@ -4160,11 +4158,11 @@ static bool rbd_quiesce_lock(struct rbd_ + */ + rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; + rbd_assert(!completion_done(&rbd_dev->releasing_wait)); +- need_wait = !list_empty(&rbd_dev->running_list); +- downgrade_write(&rbd_dev->lock_rwsem); +- if (need_wait) +- wait_for_completion(&rbd_dev->releasing_wait); +- up_read(&rbd_dev->lock_rwsem); ++ if (list_empty(&rbd_dev->running_list)) ++ return true; ++ ++ up_write(&rbd_dev->lock_rwsem); ++ wait_for_completion(&rbd_dev->releasing_wait); + + down_write(&rbd_dev->lock_rwsem); + if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) diff --git a/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already-signaled-fences.patch b/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already-signaled-fences.patch new file mode 100644 index 00000000000..54d2a1edf70 --- /dev/null +++ b/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already-signaled-fences.patch @@ -0,0 +1,88 @@ +From 3761baae908a7b5012be08d70fa553cc2eb82305 Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Wed, 14 Jul 2021 14:34:16 -0500 +Subject: Revert "drm/i915: Propagate errors on awaiting already signaled fences" + +From: Jason Ekstrand + +commit 3761baae908a7b5012be08d70fa553cc2eb82305 upstream. + +This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever +since that commit, we've been having issues where a hang in one client +can propagate to another. In particular, a hang in an app can propagate +to the X server which causes the whole desktop to lock up. + +Error propagation along fences sound like a good idea, but as your bug +shows, surprising consequences, since propagating errors across security +boundaries is not a good thing. + +What we do have is track the hangs on the ctx, and report information to +userspace using RESET_STATS. That's how arb_robustness works. Also, if my +understanding is still correct, the EIO from execbuf is when your context +is banned (because not recoverable or too many hangs). And in all these +cases it's up to userspace to figure out what is all impacted and should +be reported to the application, that's not on the kernel to guess and +automatically propagate. + +What's more, we're also building more features on top of ctx error +reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the +userspace fence wait also relies on that mechanism. So it is the path +going forward for reporting gpu hangs and resets to userspace. + +So all together that's why I think we should just bury this idea again as +not quite the direction we want to go to, hence why I think the revert is +the right option here. + +For backporters: Please note that you _must_ have a backport of +https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.net/ +for otherwise backporting just this patch opens up a security bug. + +v2: Augment commit message. Also restore Jason's sob that I +accidentally lost. + +v3: Add a note for backporters + +Signed-off-by: Jason Ekstrand +Reported-by: Marcin Slusarz +Cc: # v5.6+ +Cc: Jason Ekstrand +Cc: Marcin Slusarz +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 +Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") +Acked-by: Daniel Vetter +Reviewed-by: Jon Bloomfield +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jason@jlekstrand.net +(cherry picked from commit 93a2711cddd5760e2f0f901817d71c93183c3b87) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/i915_request.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -1285,10 +1285,8 @@ i915_request_await_execution(struct i915 + + do { + fence = *child++; +- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { +- i915_sw_fence_set_error_once(&rq->submit, fence->error); ++ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; +- } + + if (fence->context == rq->fence.context) + continue; +@@ -1386,10 +1384,8 @@ i915_request_await_dma_fence(struct i915 + + do { + fence = *child++; +- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { +- i915_sw_fence_set_error_once(&rq->submit, fence->error); ++ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; +- } + + /* + * Requests on the same timeline are explicitly ordered, along diff --git a/queue-5.10/series b/queue-5.10/series index ccf5c14b826..86d2bace5bb 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -146,3 +146,13 @@ selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch io_uring-explicitly-count-entries-for-poll-reqs.patch io_uring-remove-double-poll-entry-on-arm-failure.patch userfaultfd-do-not-untag-user-pointers.patch +memblock-make-for_each_mem_range-traverse-memblock_hotplug-regions.patch +hugetlbfs-fix-mount-mode-command-line-processing.patch +rbd-don-t-hold-lock_rwsem-while-running_list-is-being-drained.patch +rbd-always-kick-acquire-on-acquired-and-released-notifications.patch +misc-eeprom-at24-always-append-device-id-even-if-label-property-is-set.patch +nds32-fix-up-stack-guard-gap.patch +driver-core-prevent-warning-when-removing-a-device-link-from-unregistered-consumer.patch +revert-drm-i915-propagate-errors-on-awaiting-already-signaled-fences.patch +drm-return-enotty-for-non-drm-ioctls.patch +drm-amdgpu-update-golden-setting-for-sienna_cichlid.patch