From: Sasha Levin Date: Thu, 19 May 2022 13:51:39 +0000 (-0400) Subject: Fixes for 5.17 X-Git-Tag: v4.9.316~57 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c5f73bf7a2823b7c4eb8156684574bef2fdd6f27;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.17 Signed-off-by: Sasha Levin --- diff --git a/queue-5.17/alsa-hda-realtek-enable-headset-mic-on-lenovo-p360.patch b/queue-5.17/alsa-hda-realtek-enable-headset-mic-on-lenovo-p360.patch new file mode 100644 index 00000000000..19f6d580967 --- /dev/null +++ b/queue-5.17/alsa-hda-realtek-enable-headset-mic-on-lenovo-p360.patch @@ -0,0 +1,35 @@ +From 9ba3e693fe3300c24d8e602923f8b68dc73d6aa3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 26 Mar 2022 00:05:00 +0800 +Subject: ALSA: hda/realtek: Enable headset mic on Lenovo P360 + +From: Kai-Heng Feng + +[ Upstream commit 5a8738571747c1e275a40b69a608657603867b7e ] + +Lenovo P360 is another platform equipped with ALC897, and it needs +ALC897_FIXUP_HEADSET_MIC_PIN quirk to make its headset mic work. + +Signed-off-by: Kai-Heng Feng +Link: https://lore.kernel.org/r/20220325160501.705221-1-kai.heng.feng@canonical.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 51c54cf0f312..653b89b2d44b 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -11106,6 +11106,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { + SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), + SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), ++ SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), +-- +2.35.1 + diff --git a/queue-5.17/arm-9191-1-arm-stacktrace-kasan-silence-kasan-warnin.patch b/queue-5.17/arm-9191-1-arm-stacktrace-kasan-silence-kasan-warnin.patch new file mode 100644 index 00000000000..25e1f9d53da --- /dev/null +++ b/queue-5.17/arm-9191-1-arm-stacktrace-kasan-silence-kasan-warnin.patch @@ -0,0 +1,105 @@ +From e345c0983da55bfc9f597e3e247bb4a5ea5f3201 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Apr 2022 10:52:47 +0100 +Subject: ARM: 9191/1: arm/stacktrace, kasan: Silence KASAN warnings in + unwind_frame() + +From: linyujun + +[ Upstream commit 9be4c88bb7924f68f88cfd47d925c2d046f51a73 ] + +The following KASAN warning is detected by QEMU. + +================================================================== +BUG: KASAN: stack-out-of-bounds in unwind_frame+0x508/0x870 +Read of size 4 at addr c36bba90 by task cat/163 + +CPU: 1 PID: 163 Comm: cat Not tainted 5.10.0-rc1 #40 +Hardware name: ARM-Versatile Express +[] (unwind_backtrace) from [] (show_stack+0x10/0x14) +[] (show_stack) from [] (dump_stack+0x98/0xb0) +[] (dump_stack) from [] (print_address_description.constprop.0+0x58/0x4bc) +[] (print_address_description.constprop.0) from [] (kasan_report+0x154/0x170) +[] (kasan_report) from [] (unwind_frame+0x508/0x870) +[] (unwind_frame) from [] (__save_stack_trace+0x110/0x134) +[] (__save_stack_trace) from [] (stack_trace_save+0x8c/0xb4) +[] (stack_trace_save) from [] (kasan_set_track+0x38/0x60) +[] (kasan_set_track) from [] (kasan_set_free_info+0x20/0x2c) +[] (kasan_set_free_info) from [] (__kasan_slab_free+0xec/0x120) +[] (__kasan_slab_free) from [] (kmem_cache_free+0x7c/0x334) +[] (kmem_cache_free) from [] (rcu_core+0x390/0xccc) +[] (rcu_core) from [] (__do_softirq+0x180/0x518) +[] (__do_softirq) from [] (irq_exit+0x9c/0xe0) +[] (irq_exit) from [] (__handle_domain_irq+0xb0/0x110) +[] (__handle_domain_irq) from [] (gic_handle_irq+0xa0/0xb8) +[] (gic_handle_irq) from [] (__irq_svc+0x6c/0x94) +Exception stack(0xc36bb928 to 0xc36bb970) +b920: c36bb9c0 00000000 c0126919 c0101228 c36bb9c0 b76d7730 +b940: c36b8000 c36bb9a0 c3335b00 c01ce0d8 00000003 c36bba3c c36bb940 c36bb978 +b960: c010e298 c011373c 60000013 ffffffff +[] (__irq_svc) from [] (unwind_frame+0x0/0x870) +[] (unwind_frame) from [<00000000>] (0x0) + +The buggy address belongs to the page: +page:(ptrval) refcount:0 mapcount:0 mapping:00000000 index:0x0 pfn:0x636bb +flags: 0x0() +raw: 00000000 00000000 ef867764 00000000 00000000 00000000 ffffffff 00000000 +page dumped because: kasan: bad access detected + +addr c36bba90 is located in stack of task cat/163 at offset 48 in frame: + stack_trace_save+0x0/0xb4 + +this frame has 1 object: + [32, 48) 'trace' + +Memory state around the buggy address: + c36bb980: f1 f1 f1 f1 00 04 f2 f2 00 00 f3 f3 00 00 00 00 + c36bba00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 +>c36bba80: 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 + ^ + c36bbb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + c36bbb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +================================================================== + +There is a same issue on x86 and has been resolved by the commit f7d27c35ddff +("x86/mm, kasan: Silence KASAN warnings in get_wchan()"). +The solution could be applied to arm architecture too. + +Signed-off-by: Lin Yujun +Reported-by: He Ying +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/kernel/stacktrace.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c +index 75e905508f27..f0c390e9d3ce 100644 +--- a/arch/arm/kernel/stacktrace.c ++++ b/arch/arm/kernel/stacktrace.c +@@ -54,17 +54,17 @@ int notrace unwind_frame(struct stackframe *frame) + return -EINVAL; + + frame->sp = frame->fp; +- frame->fp = *(unsigned long *)(fp); +- frame->pc = *(unsigned long *)(fp + 4); ++ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); ++ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); + #else + /* check current frame pointer is within bounds */ + if (fp < low + 12 || fp > high - 4) + return -EINVAL; + + /* restore the registers from the stack frame */ +- frame->fp = *(unsigned long *)(fp - 12); +- frame->sp = *(unsigned long *)(fp - 8); +- frame->pc = *(unsigned long *)(fp - 4); ++ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); ++ frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); ++ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); + #endif + #ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(frame->pc)) +-- +2.35.1 + diff --git a/queue-5.17/crypto-stm32-fix-reference-leak-in-stm32_crc_remove.patch b/queue-5.17/crypto-stm32-fix-reference-leak-in-stm32_crc_remove.patch new file mode 100644 index 00000000000..f0e358d3219 --- /dev/null +++ b/queue-5.17/crypto-stm32-fix-reference-leak-in-stm32_crc_remove.patch @@ -0,0 +1,39 @@ +From 78a3843c6272959d46ffab6066b235c5c20b94af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Mar 2022 13:16:13 +0000 +Subject: crypto: stm32 - fix reference leak in stm32_crc_remove + +From: Zheng Yongjun + +[ Upstream commit e9a36feecee0ee5845f2e0656f50f9942dd0bed3 ] + +pm_runtime_get_sync() will increment pm usage counter even it +failed. Forgetting to call pm_runtime_put_noidle will result +in reference leak in stm32_crc_remove, so we should fix it. + +Signed-off-by: Zheng Yongjun +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +--- + drivers/crypto/stm32/stm32-crc32.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c +index be1bf39a317d..90a920e7f664 100644 +--- a/drivers/crypto/stm32/stm32-crc32.c ++++ b/drivers/crypto/stm32/stm32-crc32.c +@@ -384,8 +384,10 @@ static int stm32_crc_remove(struct platform_device *pdev) + struct stm32_crc *crc = platform_get_drvdata(pdev); + int ret = pm_runtime_get_sync(crc->dev); + +- if (ret < 0) ++ if (ret < 0) { ++ pm_runtime_put_noidle(crc->dev); + return ret; ++ } + + spin_lock(&crc_list.lock); + list_del(&crc->list); +-- +2.35.1 + diff --git a/queue-5.17/crypto-x86-chacha20-avoid-spurious-jumps-to-other-fu.patch b/queue-5.17/crypto-x86-chacha20-avoid-spurious-jumps-to-other-fu.patch new file mode 100644 index 00000000000..1c171008d0e --- /dev/null +++ b/queue-5.17/crypto-x86-chacha20-avoid-spurious-jumps-to-other-fu.patch @@ -0,0 +1,52 @@ +From f313a6811238b433a8b95c0905ce432e7d674e58 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Mar 2022 12:48:10 +0100 +Subject: crypto: x86/chacha20 - Avoid spurious jumps to other functions + +From: Peter Zijlstra + +[ Upstream commit 4327d168515fd8b5b92fa1efdf1d219fb6514460 ] + +The chacha_Nblock_xor_avx512vl() functions all have their own, +identical, .LdoneN label, however in one particular spot {2,4} jump to +the 8 version instead of their own. Resulting in: + + arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_2block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() + arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_4block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() + +Make each function consistently use its own done label. + +Reported-by: Stephen Rothwell +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Martin Willi +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +--- + arch/x86/crypto/chacha-avx512vl-x86_64.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S +index 946f74dd6fba..259383e1ad44 100644 +--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S ++++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S +@@ -172,7 +172,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl) + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx +- jz .Ldone8 ++ jz .Ldone2 + mov %rax,%r9 + and $~0xf,%r9 + +@@ -438,7 +438,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl) + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx +- jz .Ldone8 ++ jz .Ldone4 + mov %rax,%r9 + and $~0xf,%r9 + +-- +2.35.1 + diff --git a/queue-5.17/drbd-remove-usage-of-list-iterator-variable-after-lo.patch b/queue-5.17/drbd-remove-usage-of-list-iterator-variable-after-lo.patch new file mode 100644 index 00000000000..f8fa62c452d --- /dev/null +++ b/queue-5.17/drbd-remove-usage-of-list-iterator-variable-after-lo.patch @@ -0,0 +1,57 @@ +From a5d82ac5d76e5e9cbb8edd7203e0571569a1d0de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Apr 2022 00:03:48 +0200 +Subject: drbd: remove usage of list iterator variable after loop + +From: Jakob Koschel + +[ Upstream commit 901aeda62efa21f2eae937bccb71b49ae531be06 ] + +In preparation to limit the scope of a list iterator to the list +traversal loop, use a dedicated pointer to iterate through the list [1]. + +Since that variable should not be used past the loop iteration, a +separate variable is used to 'remember the current location within the +loop'. + +To either continue iterating from that position or skip the iteration +(if the previous iteration was complete) list_prepare_entry() is used. + +Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] +Signed-off-by: Jakob Koschel +Link: https://lore.kernel.org/r/20220331220349.885126-1-jakobkoschel@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/drbd/drbd_main.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c +index 478ba959362c..416f4f48f69b 100644 +--- a/drivers/block/drbd/drbd_main.c ++++ b/drivers/block/drbd/drbd_main.c +@@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, + unsigned int set_size) + { + struct drbd_request *r; +- struct drbd_request *req = NULL; ++ struct drbd_request *req = NULL, *tmp = NULL; + int expect_epoch = 0; + int expect_size = 0; + +@@ -225,8 +225,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, + * to catch requests being barrier-acked "unexpectedly". + * It usually should find the same req again, or some READ preceding it. */ + list_for_each_entry(req, &connection->transfer_log, tl_requests) +- if (req->epoch == expect_epoch) ++ if (req->epoch == expect_epoch) { ++ tmp = req; + break; ++ } ++ req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); + list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { + if (req->epoch != expect_epoch) + break; +-- +2.35.1 + diff --git a/queue-5.17/fs-fix-an-infinite-loop-in-iomap_fiemap.patch b/queue-5.17/fs-fix-an-infinite-loop-in-iomap_fiemap.patch new file mode 100644 index 00000000000..cbc93538c44 --- /dev/null +++ b/queue-5.17/fs-fix-an-infinite-loop-in-iomap_fiemap.patch @@ -0,0 +1,72 @@ +From 145788c6387a941ff9326af2b324d7a905798e6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Mar 2022 09:49:28 -0700 +Subject: fs: fix an infinite loop in iomap_fiemap + +From: Guo Xuenan + +[ Upstream commit 49df34221804cfd6384135b28b03c9461a31d024 ] + +when get fiemap starting from MAX_LFS_FILESIZE, (maxbytes - *len) < start +will always true , then *len set zero. because of start offset is beyond +file size, for erofs filesystem it will always return iomap.length with +zero,iomap iterate will enter infinite loop. it is necessary cover this +corner case to avoid this situation. + +------------[ cut here ]------------ +WARNING: CPU: 7 PID: 905 at fs/iomap/iter.c:35 iomap_iter+0x97f/0xc70 +Modules linked in: xfs erofs +CPU: 7 PID: 905 Comm: iomap Tainted: G W 5.17.0-rc8 #27 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 +RIP: 0010:iomap_iter+0x97f/0xc70 +Code: 85 a1 fc ff ff e8 71 be 9c ff 0f 1f 44 00 00 e9 92 fc ff ff e8 62 be 9c ff 0f 0b b8 fb ff ff ff e9 fc f8 ff ff e8 51 be 9c ff <0f> 0b e9 2b fc ff ff e8 45 be 9c ff 0f 0b e9 e1 fb ff ff e8 39 be +RSP: 0018:ffff888060a37ab0 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffff888060a37bb0 RCX: 0000000000000000 +RDX: ffff88807e19a900 RSI: ffffffff81a7da7f RDI: ffff888060a37be0 +RBP: 7fffffffffffffff R08: 0000000000000000 R09: ffff888060a37c20 +R10: ffff888060a37c67 R11: ffffed100c146f8c R12: 7fffffffffffffff +R13: 0000000000000000 R14: ffff888060a37bd8 R15: ffff888060a37c20 +FS: 00007fd3cca01540(0000) GS:ffff888108780000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020010820 CR3: 0000000054b92000 CR4: 00000000000006e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + iomap_fiemap+0x1c9/0x2f0 + erofs_fiemap+0x64/0x90 [erofs] + do_vfs_ioctl+0x40d/0x12e0 + __x64_sys_ioctl+0xaa/0x1c0 + do_syscall_64+0x35/0x80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +---[ end trace 0000000000000000 ]--- +watchdog: BUG: soft lockup - CPU#7 stuck for 26s! [iomap:905] + +Reported-by: Hulk Robot +Signed-off-by: Guo Xuenan +Reviewed-by: Christoph Hellwig +[djwong: fix some typos] +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Sasha Levin +--- + fs/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ioctl.c b/fs/ioctl.c +index 1ed097e94af2..85f7e4ee6924 100644 +--- a/fs/ioctl.c ++++ b/fs/ioctl.c +@@ -173,7 +173,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, + + if (*len == 0) + return -EINVAL; +- if (start > maxbytes) ++ if (start >= maxbytes) + return -EFBIG; + + /* +-- +2.35.1 + diff --git a/queue-5.17/gfs2-cancel-timed-out-glock-requests.patch b/queue-5.17/gfs2-cancel-timed-out-glock-requests.patch new file mode 100644 index 00000000000..8e7d295c770 --- /dev/null +++ b/queue-5.17/gfs2-cancel-timed-out-glock-requests.patch @@ -0,0 +1,52 @@ +From ed65b1fe10b2a2c98f4521024cf9ae80ba9f2c4c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jan 2022 12:23:55 -0500 +Subject: gfs2: cancel timed-out glock requests + +From: Andreas Gruenbacher + +[ Upstream commit 1fc05c8d8426d4085a219c23f8855c4aaf9e3ffb ] + +The gfs2 evict code tries to upgrade the iopen glock from SH to EX. If +the attempt to upgrade times out, gfs2 needs to tell dlm to cancel the +lock request or it can deadlock. We also need to wake up the process +waiting for the lock when dlm sends its AST back to gfs2. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Bob Peterson +Signed-off-by: Sasha Levin +--- + fs/gfs2/glock.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c +index 6b23399eaee0..d368d9a2e8f0 100644 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -669,6 +669,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) + + /* Check for state != intended state */ + if (unlikely(state != gl->gl_target)) { ++ if (gh && (ret & LM_OUT_CANCELED)) ++ gfs2_holder_wake(gh); + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { + /* move to back of queue and try next entry */ + if (ret & LM_OUT_CANCELED) { +@@ -1691,6 +1693,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) + struct gfs2_glock *gl = gh->gh_gl; + + spin_lock(&gl->gl_lockref.lock); ++ if (list_is_first(&gh->gh_list, &gl->gl_holders) && ++ !test_bit(HIF_HOLDER, &gh->gh_iflags)) { ++ spin_unlock(&gl->gl_lockref.lock); ++ gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); ++ wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); ++ spin_lock(&gl->gl_lockref.lock); ++ } ++ + __gfs2_glock_dq(gh); + spin_unlock(&gl->gl_lockref.lock); + } +-- +2.35.1 + diff --git a/queue-5.17/gfs2-disable-page-faults-during-lockless-buffered-re.patch b/queue-5.17/gfs2-disable-page-faults-during-lockless-buffered-re.patch new file mode 100644 index 00000000000..f081bd4b167 --- /dev/null +++ b/queue-5.17/gfs2-disable-page-faults-during-lockless-buffered-re.patch @@ -0,0 +1,50 @@ +From ed30b5c3e0530361fb1c6fc02ae7ed75fe4f546e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Mar 2022 18:32:02 +0100 +Subject: gfs2: Disable page faults during lockless buffered reads + +From: Andreas Gruenbacher + +[ Upstream commit 52f3f033a5dbd023307520af1ff551cadfd7f037 ] + +During lockless buffered reads, filemap_read() holds page cache page +references while trying to copy data to the user-space buffer. The +calling process isn't holding the inode glock, but the page references +it holds prevent those pages from being removed from the page cache, and +that prevents the underlying inode glock from being moved to another +node. Thus, we can end up in the same kinds of distributed deadlock +situations as with normal (non-lockless) buffered reads. + +Fix that by disabling page faults during lockless reads as well. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Sasha Levin +--- + fs/gfs2/file.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c +index fa071d738c78..c781c19303db 100644 +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -956,14 +956,16 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) + return ret; + iocb->ki_flags &= ~IOCB_DIRECT; + } ++ pagefault_disable(); + iocb->ki_flags |= IOCB_NOIO; + ret = generic_file_read_iter(iocb, to); + iocb->ki_flags &= ~IOCB_NOIO; ++ pagefault_enable(); + if (ret >= 0) { + if (!iov_iter_count(to)) + return ret; + written = ret; +- } else { ++ } else if (ret != -EFAULT) { + if (ret != -EAGAIN) + return ret; + if (iocb->ki_flags & IOCB_NOWAIT) +-- +2.35.1 + diff --git a/queue-5.17/gfs2-switch-lock-order-of-inode-and-iopen-glock.patch b/queue-5.17/gfs2-switch-lock-order-of-inode-and-iopen-glock.patch new file mode 100644 index 00000000000..8855699629a --- /dev/null +++ b/queue-5.17/gfs2-switch-lock-order-of-inode-and-iopen-glock.patch @@ -0,0 +1,142 @@ +From 2cba3b0e969fe0203ab73973ef0a9809bd1c2f50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jan 2022 12:23:57 -0500 +Subject: gfs2: Switch lock order of inode and iopen glock + +From: Andreas Gruenbacher + +[ Upstream commit 29464ee36bcaaee2691249f49b9592b8d5c97ece ] + +This patch tries to fix the continual ABBA deadlocks we keep having +between the iopen and inode glocks. This switches the lock order in +gfs2_inode_lookup and gfs2_create_inode so the iopen glock is always +locked first. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Bob Peterson +Signed-off-by: Sasha Levin +--- + fs/gfs2/inode.c | 49 +++++++++++++++++++++++++++---------------------- + 1 file changed, 27 insertions(+), 22 deletions(-) + +diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c +index 66a123306aec..c8ec876f33ea 100644 +--- a/fs/gfs2/inode.c ++++ b/fs/gfs2/inode.c +@@ -131,7 +131,21 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_glock *io_gl; + +- error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); ++ error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, ++ &ip->i_gl); ++ if (unlikely(error)) ++ goto fail; ++ ++ error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, ++ &io_gl); ++ if (unlikely(error)) ++ goto fail; ++ ++ if (blktype != GFS2_BLKST_UNLINKED) ++ gfs2_cancel_delete_work(io_gl); ++ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, ++ &ip->i_iopen_gh); ++ gfs2_glock_put(io_gl); + if (unlikely(error)) + goto fail; + +@@ -161,16 +175,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, + + set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); + +- error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); +- if (unlikely(error)) +- goto fail; +- if (blktype != GFS2_BLKST_UNLINKED) +- gfs2_cancel_delete_work(io_gl); +- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); +- gfs2_glock_put(io_gl); +- if (unlikely(error)) +- goto fail; +- + /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ + inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); + inode->i_atime.tv_nsec = 0; +@@ -716,13 +720,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); + BUG_ON(error); + +- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); ++ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); + if (error) + goto fail_gunlock2; + ++ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); ++ if (error) ++ goto fail_gunlock3; ++ + error = gfs2_trans_begin(sdp, blocks, 0); + if (error) +- goto fail_gunlock2; ++ goto fail_gunlock3; + + if (blocks > 1) { + ip->i_eattr = ip->i_no_addr + 1; +@@ -731,10 +739,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + init_dinode(dip, ip, symname); + gfs2_trans_end(sdp); + +- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); +- if (error) +- goto fail_gunlock2; +- + glock_set_object(ip->i_gl, ip); + glock_set_object(io_gl, ip); + gfs2_set_iop(inode); +@@ -745,14 +749,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + if (default_acl) { + error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + if (error) +- goto fail_gunlock3; ++ goto fail_gunlock4; + posix_acl_release(default_acl); + default_acl = NULL; + } + if (acl) { + error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + if (error) +- goto fail_gunlock3; ++ goto fail_gunlock4; + posix_acl_release(acl); + acl = NULL; + } +@@ -760,11 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, + &gfs2_initxattrs, NULL); + if (error) +- goto fail_gunlock3; ++ goto fail_gunlock4; + + error = link_dinode(dip, name, ip, &da); + if (error) +- goto fail_gunlock3; ++ goto fail_gunlock4; + + mark_inode_dirty(inode); + d_instantiate(dentry, inode); +@@ -782,9 +786,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + unlock_new_inode(inode); + return error; + +-fail_gunlock3: ++fail_gunlock4: + glock_clear_object(ip->i_gl, ip); + glock_clear_object(io_gl, ip); ++fail_gunlock3: + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + fail_gunlock2: + gfs2_glock_put(io_gl); +-- +2.35.1 + diff --git a/queue-5.17/input-add-bounds-checking-to-input_set_capability.patch b/queue-5.17/input-add-bounds-checking-to-input_set_capability.patch new file mode 100644 index 00000000000..c751a2f93db --- /dev/null +++ b/queue-5.17/input-add-bounds-checking-to-input_set_capability.patch @@ -0,0 +1,65 @@ +From 7f047dd70843331d424d97fdf30e002694af9575 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Mar 2022 21:55:27 -0700 +Subject: Input: add bounds checking to input_set_capability() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jeff LaBundy + +[ Upstream commit 409353cbe9fe48f6bc196114c442b1cff05a39bc ] + +Update input_set_capability() to prevent kernel panic in case the +event code exceeds the bitmap for the given event type. + +Suggested-by: Tomasz Moń +Signed-off-by: Jeff LaBundy +Reviewed-by: Tomasz Moń +Link: https://lore.kernel.org/r/20220320032537.545250-1-jeff@labundy.com +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/input.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/drivers/input/input.c b/drivers/input/input.c +index ccaeb2426385..ba246fabc6c1 100644 +--- a/drivers/input/input.c ++++ b/drivers/input/input.c +@@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex); + + static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; + ++static const unsigned int input_max_code[EV_CNT] = { ++ [EV_KEY] = KEY_MAX, ++ [EV_REL] = REL_MAX, ++ [EV_ABS] = ABS_MAX, ++ [EV_MSC] = MSC_MAX, ++ [EV_SW] = SW_MAX, ++ [EV_LED] = LED_MAX, ++ [EV_SND] = SND_MAX, ++ [EV_FF] = FF_MAX, ++}; ++ + static inline int is_event_supported(unsigned int code, + unsigned long *bm, unsigned int max) + { +@@ -2074,6 +2085,14 @@ EXPORT_SYMBOL(input_get_timestamp); + */ + void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) + { ++ if (type < EV_CNT && input_max_code[type] && ++ code > input_max_code[type]) { ++ pr_err("%s: invalid code %u for type %u\n", __func__, code, ++ type); ++ dump_stack(); ++ return; ++ } ++ + switch (type) { + case EV_KEY: + __set_bit(code, dev->keybit); +-- +2.35.1 + diff --git a/queue-5.17/input-stmfts-fix-reference-leak-in-stmfts_input_open.patch b/queue-5.17/input-stmfts-fix-reference-leak-in-stmfts_input_open.patch new file mode 100644 index 00000000000..efb90ea3740 --- /dev/null +++ b/queue-5.17/input-stmfts-fix-reference-leak-in-stmfts_input_open.patch @@ -0,0 +1,53 @@ +From 2a735f64582ea0ac81d2d3b5ab02c23f817e2e31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Mar 2022 21:56:38 -0700 +Subject: Input: stmfts - fix reference leak in stmfts_input_open + +From: Zheng Yongjun + +[ Upstream commit 26623eea0da3476446909af96c980768df07bbd9 ] + +pm_runtime_get_sync() will increment pm usage counter even it +failed. Forgetting to call pm_runtime_put_noidle will result +in reference leak in stmfts_input_open, so we should fix it. + +Signed-off-by: Zheng Yongjun +Link: https://lore.kernel.org/r/20220317131604.53538-1-zhengyongjun3@huawei.com +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/touchscreen/stmfts.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c +index bc11203c9cf7..72e0b767e1ba 100644 +--- a/drivers/input/touchscreen/stmfts.c ++++ b/drivers/input/touchscreen/stmfts.c +@@ -339,11 +339,11 @@ static int stmfts_input_open(struct input_dev *dev) + + err = pm_runtime_get_sync(&sdata->client->dev); + if (err < 0) +- return err; ++ goto out; + + err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON); + if (err) +- return err; ++ goto out; + + mutex_lock(&sdata->mutex); + sdata->running = true; +@@ -366,7 +366,9 @@ static int stmfts_input_open(struct input_dev *dev) + "failed to enable touchkey\n"); + } + +- return 0; ++out: ++ pm_runtime_put_noidle(&sdata->client->dev); ++ return err; + } + + static void stmfts_input_close(struct input_dev *dev) +-- +2.35.1 + diff --git a/queue-5.17/kconfig-add-fflush-before-ferror-check.patch b/queue-5.17/kconfig-add-fflush-before-ferror-check.patch new file mode 100644 index 00000000000..32f0fc63e61 --- /dev/null +++ b/queue-5.17/kconfig-add-fflush-before-ferror-check.patch @@ -0,0 +1,42 @@ +From c2aa41964448c89c44f54eff2629a48e03f820f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 13 Feb 2022 01:18:37 +0900 +Subject: kconfig: add fflush() before ferror() check + +From: Masahiro Yamada + +[ Upstream commit 868653f421cd37e8ec3880da19f0aac93f5c46cc ] + +As David Laight pointed out, there is not much point in calling +ferror() unless you call fflush() first. + +Reported-by: David Laight +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + scripts/kconfig/confdata.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c +index d3c3a61308ad..94dcec2cc803 100644 +--- a/scripts/kconfig/confdata.c ++++ b/scripts/kconfig/confdata.c +@@ -979,6 +979,7 @@ static int conf_write_autoconf_cmd(const char *autoconf_name) + + fprintf(out, "\n$(deps_config): ;\n"); + ++ fflush(out); + ret = ferror(out); /* error check for all fprintf() calls */ + fclose(out); + if (ret) +@@ -1097,6 +1098,7 @@ static int __conf_write_autoconf(const char *filename, + if ((sym->flags & SYMBOL_WRITE) && sym->name) + print_symbol(file, sym); + ++ fflush(file); + /* check possible errors in conf_write_heading() and print_symbol() */ + ret = ferror(file); + fclose(file); +-- +2.35.1 + diff --git a/queue-5.17/mips-lantiq-check-the-return-value-of-kzalloc.patch b/queue-5.17/mips-lantiq-check-the-return-value-of-kzalloc.patch new file mode 100644 index 00000000000..c65854f9750 --- /dev/null +++ b/queue-5.17/mips-lantiq-check-the-return-value-of-kzalloc.patch @@ -0,0 +1,135 @@ +From 518e4ea61deaa08c624bca8b79b7c37288a493b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Mar 2022 19:49:41 +0800 +Subject: MIPS: lantiq: check the return value of kzalloc() + +From: Xiaoke Wang + +[ Upstream commit 34123208bbcc8c884a0489f543a23fe9eebb5514 ] + +kzalloc() is a memory allocation function which can return NULL when +some internal memory errors happen. So it is better to check the +return value of it to prevent potential wrong memory access or +memory leak. + +Signed-off-by: Xiaoke Wang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Sasha Levin +--- + arch/mips/lantiq/falcon/sysctrl.c | 2 ++ + arch/mips/lantiq/xway/gptu.c | 2 ++ + arch/mips/lantiq/xway/sysctrl.c | 46 ++++++++++++++++++++----------- + 3 files changed, 34 insertions(+), 16 deletions(-) + +diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c +index 64726c670ca6..5204fc6d6d50 100644 +--- a/arch/mips/lantiq/falcon/sysctrl.c ++++ b/arch/mips/lantiq/falcon/sysctrl.c +@@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, + { + struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + ++ if (!clk) ++ return; + clk->cl.dev_id = dev; + clk->cl.con_id = NULL; + clk->cl.clk = clk; +diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c +index 3d5683e75cf1..200fe9ff641d 100644 +--- a/arch/mips/lantiq/xway/gptu.c ++++ b/arch/mips/lantiq/xway/gptu.c +@@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, + { + struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + ++ if (!clk) ++ return; + clk->cl.dev_id = dev_name(dev); + clk->cl.con_id = con; + clk->cl.clk = clk; +diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c +index 917fac1636b7..084f6caba5f2 100644 +--- a/arch/mips/lantiq/xway/sysctrl.c ++++ b/arch/mips/lantiq/xway/sysctrl.c +@@ -315,6 +315,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, + { + struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + ++ if (!clk) ++ return; + clk->cl.dev_id = dev; + clk->cl.con_id = con; + clk->cl.clk = clk; +@@ -338,6 +340,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, + { + struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + ++ if (!clk) ++ return; + clk->cl.dev_id = dev; + clk->cl.con_id = con; + clk->cl.clk = clk; +@@ -356,24 +360,28 @@ static void clkdev_add_pci(void) + struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); + + /* main pci clock */ +- clk->cl.dev_id = "17000000.pci"; +- clk->cl.con_id = NULL; +- clk->cl.clk = clk; +- clk->rate = CLOCK_33M; +- clk->rates = valid_pci_rates; +- clk->enable = pci_enable; +- clk->disable = pmu_disable; +- clk->module = 0; +- clk->bits = PMU_PCI; +- clkdev_add(&clk->cl); ++ if (clk) { ++ clk->cl.dev_id = "17000000.pci"; ++ clk->cl.con_id = NULL; ++ clk->cl.clk = clk; ++ clk->rate = CLOCK_33M; ++ clk->rates = valid_pci_rates; ++ clk->enable = pci_enable; ++ clk->disable = pmu_disable; ++ clk->module = 0; ++ clk->bits = PMU_PCI; ++ clkdev_add(&clk->cl); ++ } + + /* use internal/external bus clock */ +- clk_ext->cl.dev_id = "17000000.pci"; +- clk_ext->cl.con_id = "external"; +- clk_ext->cl.clk = clk_ext; +- clk_ext->enable = pci_ext_enable; +- clk_ext->disable = pci_ext_disable; +- clkdev_add(&clk_ext->cl); ++ if (clk_ext) { ++ clk_ext->cl.dev_id = "17000000.pci"; ++ clk_ext->cl.con_id = "external"; ++ clk_ext->cl.clk = clk_ext; ++ clk_ext->enable = pci_ext_enable; ++ clk_ext->disable = pci_ext_disable; ++ clkdev_add(&clk_ext->cl); ++ } + } + + /* xway socs can generate clocks on gpio pins */ +@@ -393,9 +401,15 @@ static void clkdev_add_clkout(void) + char *name; + + name = kzalloc(sizeof("clkout0"), GFP_KERNEL); ++ if (!name) ++ continue; + sprintf(name, "clkout%d", i); + + clk = kzalloc(sizeof(struct clk), GFP_KERNEL); ++ if (!clk) { ++ kfree(name); ++ continue; ++ } + clk->cl.dev_id = "1f103000.cgu"; + clk->cl.con_id = name; + clk->cl.clk = clk; +-- +2.35.1 + diff --git a/queue-5.17/nilfs2-fix-lockdep-warnings-during-disk-space-reclam.patch b/queue-5.17/nilfs2-fix-lockdep-warnings-during-disk-space-reclam.patch new file mode 100644 index 00000000000..73dd852844b --- /dev/null +++ b/queue-5.17/nilfs2-fix-lockdep-warnings-during-disk-space-reclam.patch @@ -0,0 +1,349 @@ +From b84e87ac05c00b9ef8407dcfc681bd82307d05e3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Apr 2022 11:28:21 -0700 +Subject: nilfs2: fix lockdep warnings during disk space reclamation + +From: Ryusuke Konishi + +[ Upstream commit 6e211930f79aa45d422009a5f2e5467d2369ffe5 ] + +During disk space reclamation, nilfs2 still emits the following lockdep +warning due to page/folio operations on shadowed page caches that nilfs2 +uses to get a snapshot of DAT file in memory: + + WARNING: CPU: 0 PID: 2643 at include/linux/backing-dev.h:272 __folio_mark_dirty+0x645/0x670 + ... + RIP: 0010:__folio_mark_dirty+0x645/0x670 + ... + Call Trace: + filemap_dirty_folio+0x74/0xd0 + __set_page_dirty_nobuffers+0x85/0xb0 + nilfs_copy_dirty_pages+0x288/0x510 [nilfs2] + nilfs_mdt_save_to_shadow_map+0x50/0xe0 [nilfs2] + nilfs_clean_segments+0xee/0x5d0 [nilfs2] + nilfs_ioctl_clean_segments.isra.19+0xb08/0xf40 [nilfs2] + nilfs_ioctl+0xc52/0xfb0 [nilfs2] + __x64_sys_ioctl+0x11d/0x170 + +This fixes the remaining warning by using inode objects to hold those +page caches. + +Link: https://lkml.kernel.org/r/1647867427-30498-3-git-send-email-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Tested-by: Ryusuke Konishi +Cc: Matthew Wilcox +Cc: David Hildenbrand +Cc: Hao Sun +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + fs/nilfs2/dat.c | 4 ++- + fs/nilfs2/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++--- + fs/nilfs2/mdt.c | 38 +++++++++++++++++++--------- + fs/nilfs2/mdt.h | 6 ++--- + fs/nilfs2/nilfs.h | 2 ++ + 5 files changed, 92 insertions(+), 21 deletions(-) + +diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c +index dc51d3b7a7bf..3b55e239705f 100644 +--- a/fs/nilfs2/dat.c ++++ b/fs/nilfs2/dat.c +@@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); +- nilfs_mdt_setup_shadow_map(dat, &di->shadow); ++ err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); ++ if (err) ++ goto failed; + + err = nilfs_read_inode_common(dat, raw_inode); + if (err) +diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c +index 56b642159e25..d63d4bbad9fe 100644 +--- a/fs/nilfs2/inode.c ++++ b/fs/nilfs2/inode.c +@@ -30,6 +30,7 @@ + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag + * @for_btnc: inode for B-tree node cache flag ++ * @for_shadow: inode for shadowed page cache flag + */ + struct nilfs_iget_args { + u64 ino; +@@ -37,6 +38,7 @@ struct nilfs_iget_args { + struct nilfs_root *root; + bool for_gc; + bool for_btnc; ++ bool for_shadow; + }; + + static int nilfs_iget_test(struct inode *inode, void *opaque); +@@ -317,7 +319,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, + { + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = false, +- .for_btnc = false ++ .for_btnc = false, .for_shadow = false + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +@@ -536,6 +538,12 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) + } else if (args->for_btnc) { + return 0; + } ++ if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { ++ if (!args->for_shadow) ++ return 0; ++ } else if (args->for_shadow) { ++ return 0; ++ } + + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; +@@ -557,6 +565,8 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) + NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); + if (args->for_btnc) + NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); ++ if (args->for_shadow) ++ NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); + return 0; + } + +@@ -565,7 +575,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + { + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = false, +- .for_btnc = false ++ .for_btnc = false, .for_shadow = false + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +@@ -576,7 +586,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + { + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = false, +- .for_btnc = false ++ .for_btnc = false, .for_shadow = false + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +@@ -608,7 +618,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + { + struct nilfs_iget_args args = { + .ino = ino, .root = NULL, .cno = cno, .for_gc = true, +- .for_btnc = false ++ .for_btnc = false, .for_shadow = false + }; + struct inode *inode; + int err; +@@ -655,6 +665,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) + args.cno = ii->i_cno; + args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; + args.for_btnc = true; ++ args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); +@@ -690,6 +701,50 @@ void nilfs_detach_btree_node_cache(struct inode *inode) + } + } + ++/** ++ * nilfs_iget_for_shadow - obtain inode for shadow mapping ++ * @inode: inode object that uses shadow mapping ++ * ++ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page ++ * caches for shadow mapping. The page cache for data pages is set up ++ * in one inode and the one for b-tree node pages is set up in the ++ * other inode, which is attached to the former inode. ++ * ++ * Return Value: On success, a pointer to the inode for data pages is ++ * returned. On errors, one of the following negative error code is returned ++ * in a pointer type. ++ * ++ * %-ENOMEM - Insufficient memory available. ++ */ ++struct inode *nilfs_iget_for_shadow(struct inode *inode) ++{ ++ struct nilfs_iget_args args = { ++ .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, ++ .for_btnc = false, .for_shadow = true ++ }; ++ struct inode *s_inode; ++ int err; ++ ++ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, ++ nilfs_iget_set, &args); ++ if (unlikely(!s_inode)) ++ return ERR_PTR(-ENOMEM); ++ if (!(s_inode->i_state & I_NEW)) ++ return inode; ++ ++ NILFS_I(s_inode)->i_flags = 0; ++ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); ++ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); ++ ++ err = nilfs_attach_btree_node_cache(s_inode); ++ if (unlikely(err)) { ++ iget_failed(s_inode); ++ return ERR_PTR(err); ++ } ++ unlock_new_inode(s_inode); ++ return s_inode; ++} ++ + void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode, int has_bmap) + { +diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c +index b26996420401..880b5e8cd3ec 100644 +--- a/fs/nilfs2/mdt.c ++++ b/fs/nilfs2/mdt.c +@@ -470,9 +470,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) + void nilfs_mdt_clear(struct inode *inode) + { + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); ++ struct nilfs_shadow_map *shadow = mdi->mi_shadow; + + if (mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); ++ ++ if (shadow) { ++ struct inode *s_inode = shadow->inode; ++ ++ shadow->inode = NULL; ++ iput(s_inode); ++ mdi->mi_shadow = NULL; ++ } + } + + /** +@@ -506,12 +515,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) + { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); ++ struct inode *s_inode; + + INIT_LIST_HEAD(&shadow->frozen_buffers); +- address_space_init_once(&shadow->frozen_data); +- nilfs_mapping_init(&shadow->frozen_data, inode); +- address_space_init_once(&shadow->frozen_btnodes); +- nilfs_mapping_init(&shadow->frozen_btnodes, inode); ++ ++ s_inode = nilfs_iget_for_shadow(inode); ++ if (IS_ERR(s_inode)) ++ return PTR_ERR(s_inode); ++ ++ shadow->inode = s_inode; + mi->mi_shadow = shadow; + return 0; + } +@@ -525,13 +537,14 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; ++ struct inode *s_inode = shadow->inode; + int ret; + +- ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); ++ ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); + if (ret) + goto out; + +- ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, ++ ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, + ii->i_assoc_inode->i_mapping); + if (ret) + goto out; +@@ -548,7 +561,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) + struct page *page; + int blkbits = inode->i_blkbits; + +- page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); ++ page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); + if (!page) + return -ENOMEM; + +@@ -580,7 +593,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) + struct page *page; + int n; + +- page = find_lock_page(&shadow->frozen_data, bh->b_page->index); ++ page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; +@@ -621,11 +634,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) + nilfs_palloc_clear_cache(inode); + + nilfs_clear_dirty_pages(inode->i_mapping, true); +- nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); ++ nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); + + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, +- &shadow->frozen_btnodes); ++ NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + +@@ -640,10 +653,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) + { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; ++ struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; + + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); +- truncate_inode_pages(&shadow->frozen_data, 0); +- truncate_inode_pages(&shadow->frozen_btnodes, 0); ++ truncate_inode_pages(shadow->inode->i_mapping, 0); ++ truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); + up_write(&mi->mi_sem); + } +diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h +index 8f86080a436d..9e23bab3ff12 100644 +--- a/fs/nilfs2/mdt.h ++++ b/fs/nilfs2/mdt.h +@@ -18,14 +18,12 @@ + /** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state +- * @frozen_data: shadowed dirty data pages +- * @frozen_btnodes: shadowed dirty b-tree nodes' pages ++ * @inode: holder of page caches used in shadow mapping + * @frozen_buffers: list of frozen buffers + */ + struct nilfs_shadow_map { + struct nilfs_bmap_store bmap_store; +- struct address_space frozen_data; +- struct address_space frozen_btnodes; ++ struct inode *inode; + struct list_head frozen_buffers; + }; + +diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h +index 36b048db00b7..1344f7d475d3 100644 +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -92,6 +92,7 @@ enum { + NILFS_I_BMAP, /* has bmap and btnode_cache */ + NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_BTNC, /* inode for btree node cache */ ++ NILFS_I_SHADOW, /* inode for shadowed page cache */ + }; + + /* +@@ -263,6 +264,7 @@ extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); + int nilfs_attach_btree_node_cache(struct inode *inode); + void nilfs_detach_btree_node_cache(struct inode *inode); ++struct inode *nilfs_iget_for_shadow(struct inode *inode); + extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); + extern void nilfs_truncate(struct inode *); + extern void nilfs_evict_inode(struct inode *); +-- +2.35.1 + diff --git a/queue-5.17/nilfs2-fix-lockdep-warnings-in-page-operations-for-b.patch b/queue-5.17/nilfs2-fix-lockdep-warnings-in-page-operations-for-b.patch new file mode 100644 index 00000000000..fe5a26753ed --- /dev/null +++ b/queue-5.17/nilfs2-fix-lockdep-warnings-in-page-operations-for-b.patch @@ -0,0 +1,624 @@ +From b85c63f0e6dcbe9624a7f3da1c4a1cdca2436029 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Apr 2022 11:28:18 -0700 +Subject: nilfs2: fix lockdep warnings in page operations for btree nodes + +From: Ryusuke Konishi + +[ Upstream commit e897be17a441fa637cd166fc3de1445131e57692 ] + +Patch series "nilfs2 lockdep warning fixes". + +The first two are to resolve the lockdep warning issue, and the last one +is the accompanying cleanup and low priority. + +Based on your comment, this series solves the issue by separating inode +object as needed. Since I was worried about the impact of the object +composition changes, I tested the series carefully not to cause +regressions especially for delicate functions such like disk space +reclamation and snapshots. + +This patch (of 3): + +If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at +inode_to_wb() during page/folio operations for btree nodes: + + WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline] + WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline] + WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 + Modules linked in: + ... + RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline] + RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline] + RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 + ... + Call Trace: + __set_page_dirty include/linux/pagemap.h:834 [inline] + mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145 + nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline] + nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085 + nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 + nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625 + nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009 + nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048 + nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline] + nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline] + nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036 + nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372 + nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline] + nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563 + kthread+0x405/0x4f0 kernel/kthread.c:327 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 + +This is because nilfs2 uses two page caches for each inode and +inode->i_mapping never points to one of them, the btree node cache. + +This causes inode_to_wb(inode) to refer to a different page cache than +the caller page/folio operations such like __folio_start_writeback(), +__folio_end_writeback(), or __folio_mark_dirty() acquired the lock. + +This patch resolves the issue by allocating and using an additional +inode to hold the page cache of btree nodes. The inode is attached +one-to-one to the traditional nilfs2 inode if it requires a block +mapping with b-tree. This setup change is in memory only and does not +affect the disk format. + +Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com +Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com +Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org +Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com +Reported-by: syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com +Reported-by: Hao Sun +Reported-by: David Hildenbrand +Tested-by: Ryusuke Konishi +Cc: Matthew Wilcox +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + fs/nilfs2/btnode.c | 23 ++++++++-- + fs/nilfs2/btnode.h | 1 + + fs/nilfs2/btree.c | 27 ++++++++---- + fs/nilfs2/gcinode.c | 7 +-- + fs/nilfs2/inode.c | 104 ++++++++++++++++++++++++++++++++++++++------ + fs/nilfs2/mdt.c | 7 +-- + fs/nilfs2/nilfs.h | 14 +++--- + fs/nilfs2/page.c | 7 ++- + fs/nilfs2/segment.c | 9 ++-- + fs/nilfs2/super.c | 5 +-- + 10 files changed, 154 insertions(+), 50 deletions(-) + +diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c +index 66bdaa2cf496..ca611ac09f7c 100644 +--- a/fs/nilfs2/btnode.c ++++ b/fs/nilfs2/btnode.c +@@ -20,6 +20,23 @@ + #include "page.h" + #include "btnode.h" + ++ ++/** ++ * nilfs_init_btnc_inode - initialize B-tree node cache inode ++ * @btnc_inode: inode to be initialized ++ * ++ * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. ++ */ ++void nilfs_init_btnc_inode(struct inode *btnc_inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(btnc_inode); ++ ++ btnc_inode->i_mode = S_IFREG; ++ ii->i_flags = 0; ++ memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); ++ mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); ++} ++ + void nilfs_btnode_cache_clear(struct address_space *btnc) + { + invalidate_mapping_pages(btnc, 0, -1); +@@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) + struct buffer_head * + nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) + { +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + struct buffer_head *bh; + + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); +@@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, + struct buffer_head **pbh, sector_t *submit_ptr) + { + struct buffer_head *bh; +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + struct page *page; + int err; + +@@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, + struct nilfs_btnode_chkey_ctxt *ctxt) + { + struct buffer_head *obh, *nbh; +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; + int err; + +diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h +index 11663650add7..bd5544e63a01 100644 +--- a/fs/nilfs2/btnode.h ++++ b/fs/nilfs2/btnode.h +@@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { + struct buffer_head *newbh; + }; + ++void nilfs_init_btnc_inode(struct inode *btnc_inode); + void nilfs_btnode_cache_clear(struct address_space *); + struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, + __u64 blocknr); +diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c +index 3594eabe1419..f544c22fff78 100644 +--- a/fs/nilfs2/btree.c ++++ b/fs/nilfs2/btree.c +@@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) + static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, + __u64 ptr, struct buffer_head **bhp) + { +- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btnc = btnc_inode->i_mapping; + struct buffer_head *bh; + + bh = nilfs_btnode_create_block(btnc, ptr); +@@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp, + const struct nilfs_btree_readahead_info *ra) + { +- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btnc = btnc_inode->i_mapping; + struct buffer_head *bh, *ra_bh; + sector_t submit_ptr = 0; + int ret; +@@ -1741,6 +1743,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, + dat = nilfs_bmap_get_dat(btree); + } + ++ ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); ++ if (ret < 0) ++ return ret; ++ + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); + if (ret < 0) + return ret; +@@ -1913,7 +1919,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, + path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; + path[level].bp_ctxt.bh = path[level].bp_bh; + ret = nilfs_btnode_prepare_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + if (ret < 0) { + nilfs_dat_abort_update(dat, +@@ -1939,7 +1945,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, + + if (buffer_nilfs_node(path[level].bp_bh)) { + nilfs_btnode_commit_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + path[level].bp_bh = path[level].bp_ctxt.bh; + } +@@ -1958,7 +1964,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, + &path[level].bp_newreq.bpr_req); + if (buffer_nilfs_node(path[level].bp_bh)) + nilfs_btnode_abort_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + } + +@@ -2134,7 +2140,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, + static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, + struct list_head *listp) + { +- struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btcache = btnc_inode->i_mapping; + struct list_head lists[NILFS_BTREE_LEVEL_MAX]; + struct pagevec pvec; + struct buffer_head *bh, *head; +@@ -2188,12 +2195,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, + path[level].bp_ctxt.newkey = blocknr; + path[level].bp_ctxt.bh = *bh; + ret = nilfs_btnode_prepare_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + if (ret < 0) + return ret; + nilfs_btnode_commit_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + *bh = path[level].bp_ctxt.bh; + } +@@ -2398,6 +2405,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) + ret = -EIO; ++ else ++ ret = nilfs_attach_btree_node_cache( ++ &NILFS_BMAP_I(bmap)->vfs_inode); ++ + return ret; + } + +diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c +index a8f5315f01e3..04fdd420eae7 100644 +--- a/fs/nilfs2/gcinode.c ++++ b/fs/nilfs2/gcinode.c +@@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, + int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, + __u64 vbn, struct buffer_head **out_bh) + { ++ struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; + int ret; + +- ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, ++ ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, + vbn ? : pbn, pbn, REQ_OP_READ, 0, + out_bh, &pbn); + if (ret == -EEXIST) /* internal code (cache hit) */ +@@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) + ii->i_flags = 0; + nilfs_bmap_init_gc(ii->i_bmap); + +- return 0; ++ return nilfs_attach_btree_node_cache(inode); + } + + /** +@@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); + list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); + iput(&ii->vfs_inode); + } + } +diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c +index e3d807d5b83a..56b642159e25 100644 +--- a/fs/nilfs2/inode.c ++++ b/fs/nilfs2/inode.c +@@ -29,12 +29,14 @@ + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag ++ * @for_btnc: inode for B-tree node cache flag + */ + struct nilfs_iget_args { + u64 ino; + __u64 cno; + struct nilfs_root *root; +- int for_gc; ++ bool for_gc; ++ bool for_btnc; + }; + + static int nilfs_iget_test(struct inode *inode, void *opaque); +@@ -314,7 +316,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +@@ -527,6 +530,13 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) + return 0; + + ii = NILFS_I(inode); ++ if (test_bit(NILFS_I_BTNC, &ii->i_state)) { ++ if (!args->for_btnc) ++ return 0; ++ } else if (args->for_btnc) { ++ return 0; ++ } ++ + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; + +@@ -538,15 +548,15 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) + struct nilfs_iget_args *args = opaque; + + inode->i_ino = args->ino; +- if (args->for_gc) { ++ NILFS_I(inode)->i_cno = args->cno; ++ NILFS_I(inode)->i_root = args->root; ++ if (args->root && args->ino == NILFS_ROOT_INO) ++ nilfs_get_root(args->root); ++ ++ if (args->for_gc) + NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); +- NILFS_I(inode)->i_cno = args->cno; +- NILFS_I(inode)->i_root = NULL; +- } else { +- if (args->root && args->ino == NILFS_ROOT_INO) +- nilfs_get_root(args->root); +- NILFS_I(inode)->i_root = args->root; +- } ++ if (args->for_btnc) ++ NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); + return 0; + } + +@@ -554,7 +564,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +@@ -564,7 +575,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +@@ -595,7 +607,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + __u64 cno) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 ++ .ino = ino, .root = NULL, .cno = cno, .for_gc = true, ++ .for_btnc = false + }; + struct inode *inode; + int err; +@@ -615,6 +628,68 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + return inode; + } + ++/** ++ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode ++ * @inode: inode object ++ * ++ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, ++ * or does nothing if the inode already has it. This function allocates ++ * an additional inode to maintain page cache of B-tree nodes one-on-one. ++ * ++ * Return Value: On success, 0 is returned. On errors, one of the following ++ * negative error code is returned. ++ * ++ * %-ENOMEM - Insufficient memory available. ++ */ ++int nilfs_attach_btree_node_cache(struct inode *inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(inode); ++ struct inode *btnc_inode; ++ struct nilfs_iget_args args; ++ ++ if (ii->i_assoc_inode) ++ return 0; ++ ++ args.ino = inode->i_ino; ++ args.root = ii->i_root; ++ args.cno = ii->i_cno; ++ args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; ++ args.for_btnc = true; ++ ++ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, ++ nilfs_iget_set, &args); ++ if (unlikely(!btnc_inode)) ++ return -ENOMEM; ++ if (btnc_inode->i_state & I_NEW) { ++ nilfs_init_btnc_inode(btnc_inode); ++ unlock_new_inode(btnc_inode); ++ } ++ NILFS_I(btnc_inode)->i_assoc_inode = inode; ++ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; ++ ii->i_assoc_inode = btnc_inode; ++ ++ return 0; ++} ++ ++/** ++ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode ++ * @inode: inode object ++ * ++ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its ++ * holder inode bound to @inode, or does nothing if @inode doesn't have it. ++ */ ++void nilfs_detach_btree_node_cache(struct inode *inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(inode); ++ struct inode *btnc_inode = ii->i_assoc_inode; ++ ++ if (btnc_inode) { ++ NILFS_I(btnc_inode)->i_assoc_inode = NULL; ++ ii->i_assoc_inode = NULL; ++ iput(btnc_inode); ++ } ++} ++ + void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode, int has_bmap) + { +@@ -762,7 +837,8 @@ static void nilfs_clear_inode(struct inode *inode) + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ if (!test_bit(NILFS_I_BTNC, &ii->i_state)) ++ nilfs_detach_btree_node_cache(inode); + + if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) + nilfs_put_root(ii->i_root); +diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c +index 4b3d33cf0041..b26996420401 100644 +--- a/fs/nilfs2/mdt.c ++++ b/fs/nilfs2/mdt.c +@@ -532,7 +532,7 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) + goto out; + + ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, +- &ii->i_btnode_cache); ++ ii->i_assoc_inode->i_mapping); + if (ret) + goto out; + +@@ -623,8 +623,9 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) + nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + +- nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); +- nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); ++ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); ++ nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, ++ &shadow->frozen_btnodes); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + +diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h +index a7b81755c350..36b048db00b7 100644 +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -28,7 +28,7 @@ + * @i_xattr: + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode +- * @i_btnode_cache: cached pages of b-tree nodes ++ * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode +@@ -43,7 +43,7 @@ struct nilfs_inode_info { + __u64 i_xattr; /* sector_t ??? */ + __u32 i_dir_start_lookup; + __u64 i_cno; /* check point number for GC inode */ +- struct address_space i_btnode_cache; ++ struct inode *i_assoc_inode; + struct list_head i_dirty; /* List for connecting dirty files */ + + #ifdef CONFIG_NILFS_XATTR +@@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) + return container_of(bmap, struct nilfs_inode_info, i_bmap_data); + } + +-static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) +-{ +- struct nilfs_inode_info *ii = +- container_of(btnc, struct nilfs_inode_info, i_btnode_cache); +- return &ii->vfs_inode; +-} +- + /* + * Dynamic state flags of NILFS on-memory inode (i_state) + */ +@@ -98,6 +91,7 @@ enum { + NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ + NILFS_I_BMAP, /* has bmap and btnode_cache */ + NILFS_I_GCINODE, /* inode for GC, on memory only */ ++ NILFS_I_BTNC, /* inode for btree node cache */ + }; + + /* +@@ -267,6 +261,8 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); + extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); ++int nilfs_attach_btree_node_cache(struct inode *inode); ++void nilfs_detach_btree_node_cache(struct inode *inode); + extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); + extern void nilfs_truncate(struct inode *); + extern void nilfs_evict_inode(struct inode *); +diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c +index 063dd16d75b5..45e079295008 100644 +--- a/fs/nilfs2/page.c ++++ b/fs/nilfs2/page.c +@@ -448,10 +448,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) + /* + * NILFS2 needs clear_page_dirty() in the following two cases: + * +- * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears +- * page dirty flags when it copies back pages from the shadow cache +- * (gcdat->{i_mapping,i_btnode_cache}) to its original cache +- * (dat->{i_mapping,i_btnode_cache}). ++ * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty ++ * flag of pages when it copies back pages from shadow cache to the ++ * original cache. + * + * 2) Some B-tree operations like insertion or deletion may dispose buffers + * in dirty state, and this needs to cancel the dirty state of their pages. +diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c +index 85a853334771..0afe0832c754 100644 +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -733,15 +733,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, + struct list_head *listp) + { + struct nilfs_inode_info *ii = NILFS_I(inode); +- struct address_space *mapping = &ii->i_btnode_cache; ++ struct inode *btnc_inode = ii->i_assoc_inode; + struct pagevec pvec; + struct buffer_head *bh, *head; + unsigned int i; + pgoff_t index = 0; + ++ if (!btnc_inode) ++ return; ++ + pagevec_init(&pvec); + +- while (pagevec_lookup_tag(&pvec, mapping, &index, ++ while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, + PAGECACHE_TAG_DIRTY)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + bh = head = page_buffers(pvec.pages[i]); +@@ -2410,7 +2413,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) + continue; + list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); + iput(&ii->vfs_inode); + } + } +diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c +index 63e5fa74016c..c4c6578185d5 100644 +--- a/fs/nilfs2/super.c ++++ b/fs/nilfs2/super.c +@@ -157,7 +157,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) + ii->i_bh = NULL; + ii->i_state = 0; + ii->i_cno = 0; +- nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); ++ ii->i_assoc_inode = NULL; ++ ii->i_bmap = &ii->i_bmap_data; + return &ii->vfs_inode; + } + +@@ -1377,8 +1378,6 @@ static void nilfs_inode_init_once(void *obj) + #ifdef CONFIG_NILFS_XATTR + init_rwsem(&ii->xattr_sem); + #endif +- address_space_init_once(&ii->i_btnode_cache); +- ii->i_bmap = &ii->i_bmap_data; + inode_init_once(&ii->vfs_inode); + } + +-- +2.35.1 + diff --git a/queue-5.17/nvme-multipath-fix-hang-when-disk-goes-live-over-rec.patch b/queue-5.17/nvme-multipath-fix-hang-when-disk-goes-live-over-rec.patch new file mode 100644 index 00000000000..582f2bfb0e1 --- /dev/null +++ b/queue-5.17/nvme-multipath-fix-hang-when-disk-goes-live-over-rec.patch @@ -0,0 +1,165 @@ +From aa769a7ea32f2cbdfeb9c28d4fd83ea2a21a0c84 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Mar 2022 13:05:11 -0600 +Subject: nvme-multipath: fix hang when disk goes live over reconnect + +From: Anton Eidelman + +[ Upstream commit a4a6f3c8f61c3cfbda4998ad94596059ad7e4332 ] + +nvme_mpath_init_identify() invoked from nvme_init_identify() fetches a +fresh ANA log from the ctrl. This is essential to have an up to date +path states for both existing namespaces and for those scan_work may +discover once the ctrl is up. + +This happens in the following cases: + 1) A new ctrl is being connected. + 2) An existing ctrl is successfully reconnected. + 3) An existing ctrl is being reset. + +While in (1) ctrl->namespaces is empty, (2 & 3) may have namespaces, and +nvme_read_ana_log() may call nvme_update_ns_ana_state(). + +This result in a hang when the ANA state of an existing namespace changes +and makes the disk live: nvme_mpath_set_live() issues IO to the namespace +through the ctrl, which does NOT have IO queues yet. + +See sample hang below. + +Solution: +- nvme_update_ns_ana_state() to call set_live only if ctrl is live +- nvme_read_ana_log() call from nvme_mpath_init_identify() + therefore only fetches and parses the ANA log; + any erros in this process will fail the ctrl setup as appropriate; +- a separate function nvme_mpath_update() + is called in nvme_start_ctrl(); + this parses the ANA log without fetching it. + At this point the ctrl is live, + therefore, disks can be set live normally. + +Sample failure: + nvme nvme0: starting error recovery + nvme nvme0: Reconnecting in 10 seconds... + block nvme0n6: no usable path - requeuing I/O + INFO: task kworker/u8:3:312 blocked for more than 122 seconds. + Tainted: G E 5.14.5-1.el7.elrepo.x86_64 #1 + Workqueue: nvme-wq nvme_tcp_reconnect_ctrl_work [nvme_tcp] + Call Trace: + __schedule+0x2a2/0x7e0 + schedule+0x4e/0xb0 + io_schedule+0x16/0x40 + wait_on_page_bit_common+0x15c/0x3e0 + do_read_cache_page+0x1e0/0x410 + read_cache_page+0x12/0x20 + read_part_sector+0x46/0x100 + read_lba+0x121/0x240 + efi_partition+0x1d2/0x6a0 + bdev_disk_changed.part.0+0x1df/0x430 + bdev_disk_changed+0x18/0x20 + blkdev_get_whole+0x77/0xe0 + blkdev_get_by_dev+0xd2/0x3a0 + __device_add_disk+0x1ed/0x310 + device_add_disk+0x13/0x20 + nvme_mpath_set_live+0x138/0x1b0 [nvme_core] + nvme_update_ns_ana_state+0x2b/0x30 [nvme_core] + nvme_update_ana_state+0xca/0xe0 [nvme_core] + nvme_parse_ana_log+0xac/0x170 [nvme_core] + nvme_read_ana_log+0x7d/0xe0 [nvme_core] + nvme_mpath_init_identify+0x105/0x150 [nvme_core] + nvme_init_identify+0x2df/0x4d0 [nvme_core] + nvme_init_ctrl_finish+0x8d/0x3b0 [nvme_core] + nvme_tcp_setup_ctrl+0x337/0x390 [nvme_tcp] + nvme_tcp_reconnect_ctrl_work+0x24/0x40 [nvme_tcp] + process_one_work+0x1bd/0x360 + worker_thread+0x50/0x3d0 + +Signed-off-by: Anton Eidelman +Reviewed-by: Sagi Grimberg +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 1 + + drivers/nvme/host/multipath.c | 25 +++++++++++++++++++++++-- + drivers/nvme/host/nvme.h | 4 ++++ + 3 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 10f7c79caac2..0abd772c57f0 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -4422,6 +4422,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) + if (ctrl->queue_count > 1) { + nvme_queue_scan(ctrl); + nvme_start_queues(ctrl); ++ nvme_mpath_update(ctrl); + } + } + EXPORT_SYMBOL_GPL(nvme_start_ctrl); +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index a703f1f5fb64..189175fff7e4 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -635,8 +635,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, + ns->ana_grpid = le32_to_cpu(desc->grpid); + ns->ana_state = desc->state; + clear_bit(NVME_NS_ANA_PENDING, &ns->flags); +- +- if (nvme_state_is_live(ns->ana_state)) ++ /* ++ * nvme_mpath_set_live() will trigger I/O to the multipath path device ++ * and in turn to this path device. However we cannot accept this I/O ++ * if the controller is not live. This may deadlock if called from ++ * nvme_mpath_init_identify() and the ctrl will never complete ++ * initialization, preventing I/O from completing. For this case we ++ * will reprocess the ANA log page in nvme_mpath_update() once the ++ * controller is ready. ++ */ ++ if (nvme_state_is_live(ns->ana_state) && ++ ns->ctrl->state == NVME_CTRL_LIVE) + nvme_mpath_set_live(ns); + } + +@@ -723,6 +732,18 @@ static void nvme_ana_work(struct work_struct *work) + nvme_read_ana_log(ctrl); + } + ++void nvme_mpath_update(struct nvme_ctrl *ctrl) ++{ ++ u32 nr_change_groups = 0; ++ ++ if (!ctrl->ana_log_buf) ++ return; ++ ++ mutex_lock(&ctrl->ana_lock); ++ nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); ++ mutex_unlock(&ctrl->ana_lock); ++} ++ + static void nvme_anatt_timeout(struct timer_list *t) + { + struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 68c42e831117..85f3f55c71c5 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -800,6 +800,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); + void nvme_mpath_remove_disk(struct nvme_ns_head *head); + int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); + void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); ++void nvme_mpath_update(struct nvme_ctrl *ctrl); + void nvme_mpath_uninit(struct nvme_ctrl *ctrl); + void nvme_mpath_stop(struct nvme_ctrl *ctrl); + bool nvme_mpath_clear_current_path(struct nvme_ns *ns); +@@ -874,6 +875,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, + "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); + return 0; + } ++static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) ++{ ++} + static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) + { + } +-- +2.35.1 + diff --git a/queue-5.17/nvme-pci-add-quirks-for-samsung-x5-ssds.patch b/queue-5.17/nvme-pci-add-quirks-for-samsung-x5-ssds.patch new file mode 100644 index 00000000000..7509f7316ea --- /dev/null +++ b/queue-5.17/nvme-pci-add-quirks-for-samsung-x5-ssds.patch @@ -0,0 +1,38 @@ +From 10f91534acc744fa4e1c6f95f2d63d9536a112ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Mar 2022 13:24:49 +0530 +Subject: nvme-pci: add quirks for Samsung X5 SSDs + +From: Monish Kumar R + +[ Upstream commit bc360b0b1611566e1bd47384daf49af6a1c51837 ] + +Add quirks to not fail the initialization and to have quick resume +latency after cold/warm reboot. + +Signed-off-by: Monish Kumar R +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pci.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index e4b79bee6206..94a0b933b133 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -3470,7 +3470,10 @@ static const struct pci_device_id nvme_id_table[] = { + NVME_QUIRK_128_BYTES_SQES | + NVME_QUIRK_SHARED_TAGS | + NVME_QUIRK_SKIP_CID_GEN }, +- ++ { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ ++ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| ++ NVME_QUIRK_NO_DEEPEST_PS | ++ NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, + { 0, } + }; +-- +2.35.1 + diff --git a/queue-5.17/nvmet-use-a-private-workqueue-instead-of-the-system-.patch b/queue-5.17/nvmet-use-a-private-workqueue-instead-of-the-system-.patch new file mode 100644 index 00000000000..2c865a0b201 --- /dev/null +++ b/queue-5.17/nvmet-use-a-private-workqueue-instead-of-the-system-.patch @@ -0,0 +1,453 @@ +From 2b70b3f5c3764df536ca2f0c46bd183242967ad5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Mar 2022 13:57:27 +0200 +Subject: nvmet: use a private workqueue instead of the system workqueue + +From: Sagi Grimberg + +[ Upstream commit 8832cf922151e9dfa2821736beb0ae2dd3968b6e ] + +Any attempt to flush kernel-global WQs has possibility of deadlock +so we should simply stop using them, instead introduce nvmet_wq +which is the generic nvmet workqueue for work elements that +don't explicitly require a dedicated workqueue (by the mere fact +that they are using the system_wq). + +Changes were done using the following replaces: + + - s/schedule_work(/queue_work(nvmet_wq, /g + - s/schedule_delayed_work(/queue_delayed_work(nvmet_wq, /g + - s/flush_scheduled_work()/flush_workqueue(nvmet_wq)/g + +Reported-by: Tetsuo Handa +Signed-off-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/admin-cmd.c | 2 +- + drivers/nvme/target/configfs.c | 2 +- + drivers/nvme/target/core.c | 24 ++++++++++++++++++------ + drivers/nvme/target/fc.c | 8 ++++---- + drivers/nvme/target/fcloop.c | 16 ++++++++-------- + drivers/nvme/target/io-cmd-file.c | 6 +++--- + drivers/nvme/target/loop.c | 4 ++-- + drivers/nvme/target/nvmet.h | 1 + + drivers/nvme/target/passthru.c | 2 +- + drivers/nvme/target/rdma.c | 12 ++++++------ + drivers/nvme/target/tcp.c | 10 +++++----- + 11 files changed, 50 insertions(+), 37 deletions(-) + +diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c +index 6fb24746de06..c3a9df5545cf 100644 +--- a/drivers/nvme/target/admin-cmd.c ++++ b/drivers/nvme/target/admin-cmd.c +@@ -984,7 +984,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) + ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; + mutex_unlock(&ctrl->lock); + +- schedule_work(&ctrl->async_event_work); ++ queue_work(nvmet_wq, &ctrl->async_event_work); + } + + void nvmet_execute_keep_alive(struct nvmet_req *req) +diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c +index 496d775c6770..cea30e4f5053 100644 +--- a/drivers/nvme/target/configfs.c ++++ b/drivers/nvme/target/configfs.c +@@ -1554,7 +1554,7 @@ static void nvmet_port_release(struct config_item *item) + struct nvmet_port *port = to_nvmet_port(item); + + /* Let inflight controllers teardown complete */ +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + list_del(&port->global_entry); + + kfree(port->ana_state); +diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c +index 626caf6f1e4b..1c026a21f218 100644 +--- a/drivers/nvme/target/core.c ++++ b/drivers/nvme/target/core.c +@@ -20,6 +20,9 @@ struct workqueue_struct *zbd_wq; + static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; + static DEFINE_IDA(cntlid_ida); + ++struct workqueue_struct *nvmet_wq; ++EXPORT_SYMBOL_GPL(nvmet_wq); ++ + /* + * This read/write semaphore is used to synchronize access to configuration + * information on a target system that will result in discovery log page +@@ -205,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, + list_add_tail(&aen->entry, &ctrl->async_events); + mutex_unlock(&ctrl->lock); + +- schedule_work(&ctrl->async_event_work); ++ queue_work(nvmet_wq, &ctrl->async_event_work); + } + + static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) +@@ -385,7 +388,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) + if (reset_tbkas) { + pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", + ctrl->cntlid); +- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); ++ queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); + return; + } + +@@ -403,7 +406,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) + pr_debug("ctrl %d start keep-alive timer for %d secs\n", + ctrl->cntlid, ctrl->kato); + +- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); ++ queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); + } + + void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) +@@ -1479,7 +1482,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) + mutex_lock(&ctrl->lock); + if (!(ctrl->csts & NVME_CSTS_CFS)) { + ctrl->csts |= NVME_CSTS_CFS; +- schedule_work(&ctrl->fatal_err_work); ++ queue_work(nvmet_wq, &ctrl->fatal_err_work); + } + mutex_unlock(&ctrl->lock); + } +@@ -1620,9 +1623,15 @@ static int __init nvmet_init(void) + goto out_free_zbd_work_queue; + } + ++ nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); ++ if (!nvmet_wq) { ++ error = -ENOMEM; ++ goto out_free_buffered_work_queue; ++ } ++ + error = nvmet_init_discovery(); + if (error) +- goto out_free_work_queue; ++ goto out_free_nvmet_work_queue; + + error = nvmet_init_configfs(); + if (error) +@@ -1631,7 +1640,9 @@ static int __init nvmet_init(void) + + out_exit_discovery: + nvmet_exit_discovery(); +-out_free_work_queue: ++out_free_nvmet_work_queue: ++ destroy_workqueue(nvmet_wq); ++out_free_buffered_work_queue: + destroy_workqueue(buffered_io_wq); + out_free_zbd_work_queue: + destroy_workqueue(zbd_wq); +@@ -1643,6 +1654,7 @@ static void __exit nvmet_exit(void) + nvmet_exit_configfs(); + nvmet_exit_discovery(); + ida_destroy(&cntlid_ida); ++ destroy_workqueue(nvmet_wq); + destroy_workqueue(buffered_io_wq); + destroy_workqueue(zbd_wq); + +diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c +index 22b5108168a6..c43bc5e1c7a2 100644 +--- a/drivers/nvme/target/fc.c ++++ b/drivers/nvme/target/fc.c +@@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) + list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { + if (!nvmet_fc_tgt_a_get(assoc)) + continue; +- if (!schedule_work(&assoc->del_work)) ++ if (!queue_work(nvmet_wq, &assoc->del_work)) + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + } +@@ -1546,7 +1546,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, + continue; + assoc->hostport->invalid = 1; + noassoc = false; +- if (!schedule_work(&assoc->del_work)) ++ if (!queue_work(nvmet_wq, &assoc->del_work)) + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + } +@@ -1592,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) + nvmet_fc_tgtport_put(tgtport); + + if (found_ctrl) { +- if (!schedule_work(&assoc->del_work)) ++ if (!queue_work(nvmet_wq, &assoc->del_work)) + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + return; +@@ -2060,7 +2060,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, + iod->rqstdatalen = lsreqbuf_len; + iod->hosthandle = hosthandle; + +- schedule_work(&iod->work); ++ queue_work(nvmet_wq, &iod->work); + + return 0; + } +diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c +index 54606f1872b4..5c16372f3b53 100644 +--- a/drivers/nvme/target/fcloop.c ++++ b/drivers/nvme/target/fcloop.c +@@ -360,7 +360,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); +- schedule_work(&rport->ls_work); ++ queue_work(nvmet_wq, &rport->ls_work); + return ret; + } + +@@ -393,7 +393,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); +- schedule_work(&rport->ls_work); ++ queue_work(nvmet_wq, &rport->ls_work); + } + + return 0; +@@ -448,7 +448,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); +- schedule_work(&tport->ls_work); ++ queue_work(nvmet_wq, &tport->ls_work); + return ret; + } + +@@ -480,7 +480,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); +- schedule_work(&tport->ls_work); ++ queue_work(nvmet_wq, &tport->ls_work); + } + + return 0; +@@ -520,7 +520,7 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) + tgt_rscn->tport = tgtport->private; + INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work); + +- schedule_work(&tgt_rscn->work); ++ queue_work(nvmet_wq, &tgt_rscn->work); + } + + static void +@@ -739,7 +739,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, + INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); + kref_init(&tfcp_req->ref); + +- schedule_work(&tfcp_req->fcp_rcv_work); ++ queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); + + return 0; + } +@@ -921,7 +921,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + { + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + +- schedule_work(&tfcp_req->tio_done_work); ++ queue_work(nvmet_wq, &tfcp_req->tio_done_work); + } + + static void +@@ -976,7 +976,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, + + if (abortio) + /* leave the reference while the work item is scheduled */ +- WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); ++ WARN_ON(!queue_work(nvmet_wq, &tfcp_req->abort_rcv_work)); + else { + /* + * as the io has already had the done callback made, +diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c +index 6be6e59d273b..80f079a7015d 100644 +--- a/drivers/nvme/target/io-cmd-file.c ++++ b/drivers/nvme/target/io-cmd-file.c +@@ -292,7 +292,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) + if (!nvmet_check_transfer_len(req, 0)) + return; + INIT_WORK(&req->f.work, nvmet_file_flush_work); +- schedule_work(&req->f.work); ++ queue_work(nvmet_wq, &req->f.work); + } + + static void nvmet_file_execute_discard(struct nvmet_req *req) +@@ -352,7 +352,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) + return; + INIT_WORK(&req->f.work, nvmet_file_dsm_work); +- schedule_work(&req->f.work); ++ queue_work(nvmet_wq, &req->f.work); + } + + static void nvmet_file_write_zeroes_work(struct work_struct *w) +@@ -382,7 +382,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) + if (!nvmet_check_transfer_len(req, 0)) + return; + INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); +- schedule_work(&req->f.work); ++ queue_work(nvmet_wq, &req->f.work); + } + + u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) +diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c +index eb1094254c82..2a968eeddda3 100644 +--- a/drivers/nvme/target/loop.c ++++ b/drivers/nvme/target/loop.c +@@ -166,7 +166,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, + iod->req.transfer_len = blk_rq_payload_bytes(req); + } + +- schedule_work(&iod->work); ++ queue_work(nvmet_wq, &iod->work); + return BLK_STS_OK; + } + +@@ -187,7 +187,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) + return; + } + +- schedule_work(&iod->work); ++ queue_work(nvmet_wq, &iod->work); + } + + static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, +diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h +index af193423c10b..ff26dbde8c1e 100644 +--- a/drivers/nvme/target/nvmet.h ++++ b/drivers/nvme/target/nvmet.h +@@ -366,6 +366,7 @@ struct nvmet_req { + + extern struct workqueue_struct *buffered_io_wq; + extern struct workqueue_struct *zbd_wq; ++extern struct workqueue_struct *nvmet_wq; + + static inline void nvmet_set_result(struct nvmet_req *req, u32 result) + { +diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c +index 9e5b89ae29df..2b5031b646e9 100644 +--- a/drivers/nvme/target/passthru.c ++++ b/drivers/nvme/target/passthru.c +@@ -281,7 +281,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) + if (req->p.use_workqueue || effects) { + INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); + req->p.rq = rq; +- schedule_work(&req->p.work); ++ queue_work(nvmet_wq, &req->p.work); + } else { + rq->end_io_data = req; + blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done); +diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c +index 1deb4043e242..0ebfe2191165 100644 +--- a/drivers/nvme/target/rdma.c ++++ b/drivers/nvme/target/rdma.c +@@ -1584,7 +1584,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, + + if (queue->host_qid == 0) { + /* Let inflight controller teardown complete */ +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + } + + ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); +@@ -1669,7 +1669,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) + + if (disconnect) { + rdma_disconnect(queue->cm_id); +- schedule_work(&queue->release_work); ++ queue_work(nvmet_wq, &queue->release_work); + } + } + +@@ -1699,7 +1699,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, + mutex_unlock(&nvmet_rdma_queue_mutex); + + pr_err("failed to connect queue %d\n", queue->idx); +- schedule_work(&queue->release_work); ++ queue_work(nvmet_wq, &queue->release_work); + } + + /** +@@ -1773,7 +1773,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, + if (!queue) { + struct nvmet_rdma_port *port = cm_id->context; + +- schedule_delayed_work(&port->repair_work, 0); ++ queue_delayed_work(nvmet_wq, &port->repair_work, 0); + break; + } + fallthrough; +@@ -1903,7 +1903,7 @@ static void nvmet_rdma_repair_port_work(struct work_struct *w) + nvmet_rdma_disable_port(port); + ret = nvmet_rdma_enable_port(port); + if (ret) +- schedule_delayed_work(&port->repair_work, 5 * HZ); ++ queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); + } + + static int nvmet_rdma_add_port(struct nvmet_port *nport) +@@ -2053,7 +2053,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data + } + mutex_unlock(&nvmet_rdma_queue_mutex); + +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + } + + static struct ib_client nvmet_rdma_ib_client = { +diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c +index 7c1c43ce466b..31bab7477d53 100644 +--- a/drivers/nvme/target/tcp.c ++++ b/drivers/nvme/target/tcp.c +@@ -1269,7 +1269,7 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) + spin_lock(&queue->state_lock); + if (queue->state != NVMET_TCP_Q_DISCONNECTING) { + queue->state = NVMET_TCP_Q_DISCONNECTING; +- schedule_work(&queue->release_work); ++ queue_work(nvmet_wq, &queue->release_work); + } + spin_unlock(&queue->state_lock); + } +@@ -1684,7 +1684,7 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) + goto out; + + if (sk->sk_state == TCP_LISTEN) +- schedule_work(&port->accept_work); ++ queue_work(nvmet_wq, &port->accept_work); + out: + read_unlock_bh(&sk->sk_callback_lock); + } +@@ -1815,7 +1815,7 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) + + if (sq->qid == 0) { + /* Let inflight controller teardown complete */ +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + } + + queue->nr_cmds = sq->size * 2; +@@ -1876,12 +1876,12 @@ static void __exit nvmet_tcp_exit(void) + + nvmet_unregister_transport(&nvmet_tcp_ops); + +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); +- flush_scheduled_work(); ++ flush_workqueue(nvmet_wq); + + destroy_workqueue(nvmet_tcp_wq); + } +-- +2.35.1 + diff --git a/queue-5.17/platform-chrome-cros_ec_debugfs-detach-log-reader-wq.patch b/queue-5.17/platform-chrome-cros_ec_debugfs-detach-log-reader-wq.patch new file mode 100644 index 00000000000..e886dca4081 --- /dev/null +++ b/queue-5.17/platform-chrome-cros_ec_debugfs-detach-log-reader-wq.patch @@ -0,0 +1,120 @@ +From cbfcc1e2f322132efa11ecfa42dd2585b4e67ff4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Feb 2022 13:11:30 +0800 +Subject: platform/chrome: cros_ec_debugfs: detach log reader wq from devm + +From: Tzung-Bi Shih + +[ Upstream commit 0e8eb5e8acbad19ac2e1856b2fb2320184299b33 ] + +Debugfs console_log uses devm memory (e.g. debug_info in +cros_ec_console_log_poll()). However, lifecycles of device and debugfs +are independent. An use-after-free issue is observed if userland +program operates the debugfs after the memory has been freed. + +The call trace: + do_raw_spin_lock + _raw_spin_lock_irqsave + remove_wait_queue + ep_unregister_pollwait + ep_remove + do_epoll_ctl + +A Python example to reproduce the issue: +... import select +... p = select.epoll() +... f = open('/sys/kernel/debug/cros_scp/console_log') +... p.register(f, select.POLLIN) +... p.poll(1) +[(4, 1)] # 4=fd, 1=select.POLLIN + +[ shutdown cros_scp at the point ] + +... p.poll(1) +[(4, 16)] # 4=fd, 16=select.POLLHUP +... p.unregister(f) + +An use-after-free issue raises here. It called epoll_ctl with +EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e. +log_wq). + +Detaches log reader's workqueue from devm to make sure it is persistent +even if the device has been removed. + +Signed-off-by: Tzung-Bi Shih +Reviewed-by: Guenter Roeck +Link: https://lore.kernel.org/r/20220209051130.386175-1-tzungbi@google.com +Signed-off-by: Benson Leung +Signed-off-by: Sasha Levin +--- + drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c +index 272c89837d74..0dbceee87a4b 100644 +--- a/drivers/platform/chrome/cros_ec_debugfs.c ++++ b/drivers/platform/chrome/cros_ec_debugfs.c +@@ -25,6 +25,9 @@ + + #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1)) + ++/* waitqueue for log readers */ ++static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq); ++ + /** + * struct cros_ec_debugfs - EC debugging information. + * +@@ -33,7 +36,6 @@ + * @log_buffer: circular buffer for console log information + * @read_msg: preallocated EC command and buffer to read console log + * @log_mutex: mutex to protect circular buffer +- * @log_wq: waitqueue for log readers + * @log_poll_work: recurring task to poll EC for new console log data + * @panicinfo_blob: panicinfo debugfs blob + */ +@@ -44,7 +46,6 @@ struct cros_ec_debugfs { + struct circ_buf log_buffer; + struct cros_ec_command *read_msg; + struct mutex log_mutex; +- wait_queue_head_t log_wq; + struct delayed_work log_poll_work; + /* EC panicinfo */ + struct debugfs_blob_wrapper panicinfo_blob; +@@ -107,7 +108,7 @@ static void cros_ec_console_log_work(struct work_struct *__work) + buf_space--; + } + +- wake_up(&debug_info->log_wq); ++ wake_up(&cros_ec_debugfs_log_wq); + } + + mutex_unlock(&debug_info->log_mutex); +@@ -141,7 +142,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf, + + mutex_unlock(&debug_info->log_mutex); + +- ret = wait_event_interruptible(debug_info->log_wq, ++ ret = wait_event_interruptible(cros_ec_debugfs_log_wq, + CIRC_CNT(cb->head, cb->tail, LOG_SIZE)); + if (ret < 0) + return ret; +@@ -173,7 +174,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file, + struct cros_ec_debugfs *debug_info = file->private_data; + __poll_t mask = 0; + +- poll_wait(file, &debug_info->log_wq, wait); ++ poll_wait(file, &cros_ec_debugfs_log_wq, wait); + + mutex_lock(&debug_info->log_mutex); + if (CIRC_CNT(debug_info->log_buffer.head, +@@ -377,7 +378,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info) + debug_info->log_buffer.tail = 0; + + mutex_init(&debug_info->log_mutex); +- init_waitqueue_head(&debug_info->log_wq); + + debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir, + debug_info, &cros_ec_console_log_fops); +-- +2.35.1 + diff --git a/queue-5.17/rtc-fix-use-after-free-on-device-removal.patch b/queue-5.17/rtc-fix-use-after-free-on-device-removal.patch new file mode 100644 index 00000000000..3aa72b88743 --- /dev/null +++ b/queue-5.17/rtc-fix-use-after-free-on-device-removal.patch @@ -0,0 +1,80 @@ +From 5202a7f9fe9e741851523815c8f30ec706c123d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 17:09:51 +0100 +Subject: rtc: fix use-after-free on device removal + +From: Vincent Whitchurch + +[ Upstream commit c8fa17d9f08a448184f03d352145099b5beb618e ] + +If the irqwork is still scheduled or running while the RTC device is +removed, a use-after-free occurs in rtc_timer_do_work(). Cleanup the +timerqueue and ensure the work is stopped to fix this. + + BUG: KASAN: use-after-free in mutex_lock+0x94/0x110 + Write of size 8 at addr ffffff801d846338 by task kworker/3:1/41 + + Workqueue: events rtc_timer_do_work + Call trace: + mutex_lock+0x94/0x110 + rtc_timer_do_work+0xec/0x630 + process_one_work+0x5fc/0x1344 + ... + + Allocated by task 551: + kmem_cache_alloc_trace+0x384/0x6e0 + devm_rtc_allocate_device+0xf0/0x574 + devm_rtc_device_register+0x2c/0x12c + ... + + Freed by task 572: + kfree+0x114/0x4d0 + rtc_device_release+0x64/0x80 + device_release+0x8c/0x1f4 + kobject_put+0x1c4/0x4b0 + put_device+0x20/0x30 + devm_rtc_release_device+0x1c/0x30 + devm_action_release+0x54/0x90 + release_nodes+0x124/0x310 + devres_release_group+0x170/0x240 + i2c_device_remove+0xd8/0x314 + ... + + Last potentially related work creation: + insert_work+0x5c/0x330 + queue_work_on+0xcc/0x154 + rtc_set_time+0x188/0x5bc + rtc_dev_ioctl+0x2ac/0xbd0 + ... + +Signed-off-by: Vincent Whitchurch +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210160951.7718-1-vincent.whitchurch@axis.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/class.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c +index 4b460c61f1d8..40d504dac1a9 100644 +--- a/drivers/rtc/class.c ++++ b/drivers/rtc/class.c +@@ -26,6 +26,15 @@ struct class *rtc_class; + static void rtc_device_release(struct device *dev) + { + struct rtc_device *rtc = to_rtc_device(dev); ++ struct timerqueue_head *head = &rtc->timerqueue; ++ struct timerqueue_node *node; ++ ++ mutex_lock(&rtc->ops_lock); ++ while ((node = timerqueue_getnext(head))) ++ timerqueue_del(head, node); ++ mutex_unlock(&rtc->ops_lock); ++ ++ cancel_work_sync(&rtc->irqwork); + + ida_simple_remove(&rtc_ida, rtc->id); + mutex_destroy(&rtc->ops_lock); +-- +2.35.1 + diff --git a/queue-5.17/rtc-mc146818-lib-fix-the-altcentury-for-amd-platform.patch b/queue-5.17/rtc-mc146818-lib-fix-the-altcentury-for-amd-platform.patch new file mode 100644 index 00000000000..593975ee10d --- /dev/null +++ b/queue-5.17/rtc-mc146818-lib-fix-the-altcentury-for-amd-platform.patch @@ -0,0 +1,109 @@ +From 4e284f85f7828b3c0d999667aed6f4c9aa8e1323 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jan 2022 16:57:50 -0600 +Subject: rtc: mc146818-lib: Fix the AltCentury for AMD platforms + +From: Mario Limonciello + +[ Upstream commit 3ae8fd41573af4fb3a490c9ed947fc936ba87190 ] + +Setting the century forward has been failing on AMD platforms. +There was a previous attempt at fixing this for family 0x17 as part of +commit 7ad295d5196a ("rtc: Fix the AltCentury value on AMD/Hygon +platform") but this was later reverted due to some problems reported +that appeared to stem from an FW bug on a family 0x17 desktop system. + +The same comments mentioned in the previous commit continue to apply +to the newer platforms as well. + +``` +MC146818 driver use function mc146818_set_time() to set register +RTC_FREQ_SELECT(RTC_REG_A)'s bit4-bit6 field which means divider stage +reset value on Intel platform to 0x7. + +While AMD/Hygon RTC_REG_A(0Ah)'s bit4 is defined as DV0 [Reference]: +DV0 = 0 selects Bank 0, DV0 = 1 selects Bank 1. Bit5-bit6 is defined +as reserved. + +DV0 is set to 1, it will select Bank 1, which will disable AltCentury +register(0x32) access. As UEFI pass acpi_gbl_FADT.century 0x32 +(AltCentury), the CMOS write will be failed on code: +CMOS_WRITE(century, acpi_gbl_FADT.century). + +Correct RTC_REG_A bank select bit(DV0) to 0 on AMD/Hygon CPUs, it will +enable AltCentury(0x32) register writing and finally setup century as +expected. +``` + +However in closer examination the change previously submitted was also +modifying bits 5 & 6 which are declared reserved in the AMD documentation. +So instead modify just the DV0 bank selection bit. + +Being cognizant that there was a failure reported before, split the code +change out to a static function that can also be used for exclusions if +any regressions such as Mikhail's pop up again. + +Cc: Jinke Fan +Cc: Mikhail Gavrilov +Link: https://lore.kernel.org/all/CABXGCsMLob0DC25JS8wwAYydnDoHBSoMh2_YLPfqm3TTvDE-Zw@mail.gmail.com/ +Link: https://www.amd.com/system/files/TechDocs/51192_Bolton_FCH_RRG.pdf +Signed-off-by: Raul E Rangel +Signed-off-by: Mario Limonciello +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20220111225750.1699-1-mario.limonciello@amd.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-mc146818-lib.c | 16 +++++++++++++++- + include/linux/mc146818rtc.h | 2 ++ + 2 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 562f99b664a2..522449b25921 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -176,6 +176,17 @@ int mc146818_get_time(struct rtc_time *time) + } + EXPORT_SYMBOL_GPL(mc146818_get_time); + ++/* AMD systems don't allow access to AltCentury with DV1 */ ++static bool apply_amd_register_a_behavior(void) ++{ ++#ifdef CONFIG_X86 ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ return true; ++#endif ++ return false; ++} ++ + /* Set the current date and time in the real time clock. */ + int mc146818_set_time(struct rtc_time *time) + { +@@ -249,7 +260,10 @@ int mc146818_set_time(struct rtc_time *time) + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); +- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); ++ if (apply_amd_register_a_behavior()) ++ CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); ++ else ++ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + #ifdef CONFIG_MACH_DECSTATION + CMOS_WRITE(real_yrs, RTC_DEC_YEAR); +diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h +index 808bb4cee230..b0da04fe087b 100644 +--- a/include/linux/mc146818rtc.h ++++ b/include/linux/mc146818rtc.h +@@ -86,6 +86,8 @@ struct cmos_rtc_board_info { + /* 2 values for divider stage reset, others for "testing purposes only" */ + # define RTC_DIV_RESET1 0x60 + # define RTC_DIV_RESET2 0x70 ++ /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ ++# define RTC_AMD_BANK_SELECT 0x10 + /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ + # define RTC_RATE_SELECT 0x0F + +-- +2.35.1 + diff --git a/queue-5.17/rtc-pcf2127-fix-bug-when-reading-alarm-registers.patch b/queue-5.17/rtc-pcf2127-fix-bug-when-reading-alarm-registers.patch new file mode 100644 index 00000000000..58660163fe5 --- /dev/null +++ b/queue-5.17/rtc-pcf2127-fix-bug-when-reading-alarm-registers.patch @@ -0,0 +1,45 @@ +From 0b5ce9af262fc260b1f6c3bd06e830c5ebc10718 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Feb 2022 11:29:07 -0500 +Subject: rtc: pcf2127: fix bug when reading alarm registers + +From: Hugo Villeneuve + +[ Upstream commit 73ce05302007eece23a6acb7dc124c92a2209087 ] + +The first bug is that reading the 5 alarm registers results in a read +operation of 20 bytes. The reason is because the destination buffer is +defined as an array of "unsigned int", and we use the sizeof() +operator on this array to define the bulk read count. + +The second bug is that the read value is invalid, because we are +indexing the destination buffer as integers (4 bytes), instead of +indexing it as u8. + +Changing the destination buffer type to u8 fixes both problems. + +Signed-off-by: Hugo Villeneuve +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20220208162908.3182581-1-hugo@hugovil.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-pcf2127.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c +index 81a5b1f2e68c..6c9d8de41e7b 100644 +--- a/drivers/rtc/rtc-pcf2127.c ++++ b/drivers/rtc/rtc-pcf2127.c +@@ -374,7 +374,8 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) + static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) + { + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); +- unsigned int buf[5], ctrl2; ++ u8 buf[5]; ++ unsigned int ctrl2; + int ret; + + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); +-- +2.35.1 + diff --git a/queue-5.17/rtc-sun6i-fix-time-overflow-handling.patch b/queue-5.17/rtc-sun6i-fix-time-overflow-handling.patch new file mode 100644 index 00000000000..8350f3b23f7 --- /dev/null +++ b/queue-5.17/rtc-sun6i-fix-time-overflow-handling.patch @@ -0,0 +1,80 @@ +From 6e0ede398a412570d63274e03c5a0c30ead16c58 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Feb 2022 12:26:28 +0000 +Subject: rtc: sun6i: Fix time overflow handling + +From: Andre Przywara + +[ Upstream commit 9f6cd82eca7e91a0d0311242a87c6aa3c2737968 ] + +Using "unsigned long" for UNIX timestamps is never a good idea, and +comparing the value of such a variable against U32_MAX does not do +anything useful on 32-bit systems. + +Use the proper time64_t type when dealing with timestamps, and avoid +cutting down the time range unnecessarily. This also fixes the flawed +check for the alarm time being too far into the future. + +The check for this condition is actually somewhat theoretical, as the +RTC counts till 2033 only anyways, and 2^32 seconds from now is not +before the year 2157 - at which point I hope nobody will be using this +hardware anymore. + +Signed-off-by: Andre Przywara +Reviewed-by: Jernej Skrabec +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20220211122643.1343315-4-andre.przywara@arm.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-sun6i.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c +index 711832c758ae..bcc0c2ce4b4e 100644 +--- a/drivers/rtc/rtc-sun6i.c ++++ b/drivers/rtc/rtc-sun6i.c +@@ -138,7 +138,7 @@ struct sun6i_rtc_dev { + const struct sun6i_rtc_clk_data *data; + void __iomem *base; + int irq; +- unsigned long alarm; ++ time64_t alarm; + + struct clk_hw hw; + struct clk_hw *int_osc; +@@ -510,10 +510,8 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) + struct sun6i_rtc_dev *chip = dev_get_drvdata(dev); + struct rtc_time *alrm_tm = &wkalrm->time; + struct rtc_time tm_now; +- unsigned long time_now = 0; +- unsigned long time_set = 0; +- unsigned long time_gap = 0; +- int ret = 0; ++ time64_t time_now, time_set; ++ int ret; + + ret = sun6i_rtc_gettime(dev, &tm_now); + if (ret < 0) { +@@ -528,9 +526,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) + return -EINVAL; + } + +- time_gap = time_set - time_now; +- +- if (time_gap > U32_MAX) { ++ if ((time_set - time_now) > U32_MAX) { + dev_err(dev, "Date too far in the future\n"); + return -EINVAL; + } +@@ -539,7 +535,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) + writel(0, chip->base + SUN6I_ALRM_COUNTER); + usleep_range(100, 300); + +- writel(time_gap, chip->base + SUN6I_ALRM_COUNTER); ++ writel(time_set - time_now, chip->base + SUN6I_ALRM_COUNTER); + chip->alarm = time_set; + + sun6i_rtc_setaie(wkalrm->enabled, chip); +-- +2.35.1 + diff --git a/queue-5.17/s390-pci-improve-zpci_dev-reference-counting.patch b/queue-5.17/s390-pci-improve-zpci_dev-reference-counting.patch new file mode 100644 index 00000000000..db6a0366253 --- /dev/null +++ b/queue-5.17/s390-pci-improve-zpci_dev-reference-counting.patch @@ -0,0 +1,139 @@ +From e3ba0bb36a975f6b85fef3e4d750b4500563bfcb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Sep 2021 09:32:21 +0200 +Subject: s390/pci: improve zpci_dev reference counting + +From: Niklas Schnelle + +[ Upstream commit c122383d221dfa2f41cfe5e672540595de986fde ] + +Currently zpci_dev uses kref based reference counting but only accounts +for one original reference plus one reference from an added pci_dev to +its underlying zpci_dev. Counting just the original reference worked +until the pci_dev reference was added in commit 2a671f77ee49 ("s390/pci: +fix use after free of zpci_dev") because once a zpci_dev goes away, i.e. +enters the reserved state, it would immediately get released. However +with the pci_dev reference this is no longer the case and the zpci_dev +may still appear in multiple availability events indicating that it was +reserved. This was solved by detecting when the zpci_dev is already on +its way out but still hanging around. This has however shown some light +on how unusual our zpci_dev reference counting is. + +Improve upon this by modelling zpci_dev reference counting on pci_dev. +Analogous to pci_get_slot() increment the reference count in +get_zdev_by_fid(). Thus all users of get_zdev_by_fid() must drop the +reference once they are done with the zpci_dev. + +Similar to pci_scan_single_device(), zpci_create_device() returns the +device with an initial count of 1 and the device added to the zpci_list +(analogous to the PCI bus' device_list). In turn users of +zpci_create_device() must only drop the reference once the device is +gone from the point of view of the zPCI subsystem, it might still be +referenced by the common PCI subsystem though. + +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Vasily Gorbik +Signed-off-by: Sasha Levin +--- + arch/s390/pci/pci.c | 1 + + arch/s390/pci/pci_bus.h | 3 ++- + arch/s390/pci/pci_clp.c | 9 +++++++-- + arch/s390/pci/pci_event.c | 7 ++++++- + 4 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c +index 792f8e0f2178..5bcd9228db5f 100644 +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) + list_for_each_entry(tmp, &zpci_list, entry) { + if (tmp->fid == fid) { + zdev = tmp; ++ zpci_zdev_get(zdev); + break; + } + } +diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h +index e359d2686178..ecef3a9e16c0 100644 +--- a/arch/s390/pci/pci_bus.h ++++ b/arch/s390/pci/pci_bus.h +@@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); + void zpci_release_device(struct kref *kref); + static inline void zpci_zdev_put(struct zpci_dev *zdev) + { +- kref_put(&zdev->kref, zpci_release_device); ++ if (zdev) ++ kref_put(&zdev->kref, zpci_release_device); + } + + static inline void zpci_zdev_get(struct zpci_dev *zdev) +diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c +index be077b39da33..5011d27461fd 100644 +--- a/arch/s390/pci/pci_clp.c ++++ b/arch/s390/pci/pci_clp.c +@@ -22,6 +22,8 @@ + #include + #include + ++#include "pci_bus.h" ++ + bool zpci_unique_uid; + + void update_uid_checking(bool new) +@@ -403,8 +405,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) + return; + + zdev = get_zdev_by_fid(entry->fid); +- if (!zdev) +- zpci_create_device(entry->fid, entry->fh, entry->config_state); ++ if (zdev) { ++ zpci_zdev_put(zdev); ++ return; ++ } ++ zpci_create_device(entry->fid, entry->fh, entry->config_state); + } + + int clp_scan_pci_devices(void) +diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c +index 2e3e5b278925..ea9db5cea64e 100644 +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -269,7 +269,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) + pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); + + if (!pdev) +- return; ++ goto no_pdev; + + switch (ccdf->pec) { + case 0x003a: /* Service Action or Error Recovery Successful */ +@@ -286,6 +286,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) + break; + } + pci_dev_put(pdev); ++no_pdev: ++ zpci_zdev_put(zdev); + } + + void zpci_event_error(void *data) +@@ -314,6 +316,7 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + { + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); ++ bool existing_zdev = !!zdev; + enum zpci_state state; + + zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", +@@ -378,6 +381,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) + default: + break; + } ++ if (existing_zdev) ++ zpci_zdev_put(zdev); + } + + void zpci_event_availability(void *data) +-- +2.35.1 + diff --git a/queue-5.17/s390-traps-improve-panic-message-for-translation-spe.patch b/queue-5.17/s390-traps-improve-panic-message-for-translation-spe.patch new file mode 100644 index 00000000000..7e24b515eef --- /dev/null +++ b/queue-5.17/s390-traps-improve-panic-message-for-translation-spe.patch @@ -0,0 +1,52 @@ +From 556364e3b1061458ca728653061b4b157877f6d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Mar 2022 19:13:20 +0100 +Subject: s390/traps: improve panic message for translation-specification + exception + +From: Heiko Carstens + +[ Upstream commit f09354ffd84eef3c88efa8ba6df05efe50cfd16a ] + +There are many different types of translation exceptions but only a +translation-specification exception leads to a kernel panic since it +indicates corrupted page tables, which must never happen. + +Improve the panic message so it is a bit more obvious what this is about. + +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/traps.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c +index 2b780786fc68..ead721965b9f 100644 +--- a/arch/s390/kernel/traps.c ++++ b/arch/s390/kernel/traps.c +@@ -142,10 +142,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) + do_trap(regs, SIGFPE, si_code, "floating point exception"); + } + +-static void translation_exception(struct pt_regs *regs) ++static void translation_specification_exception(struct pt_regs *regs) + { + /* May never happen. */ +- panic("Translation exception"); ++ panic("Translation-Specification Exception"); + } + + static void illegal_op(struct pt_regs *regs) +@@ -374,7 +374,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = { + [0x0f] = hfp_divide_exception, + [0x10] = do_dat_exception, + [0x11] = do_dat_exception, +- [0x12] = translation_exception, ++ [0x12] = translation_specification_exception, + [0x13] = special_op_exception, + [0x14] = default_trap_handler, + [0x15] = operand_exception, +-- +2.35.1 + diff --git a/queue-5.17/series b/queue-5.17/series index 80224e60fcf..214ed8a83cb 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -14,3 +14,33 @@ watchdog-sp5100_tco-refactor-mmio-base-address-initialization.patch watchdog-sp5100_tco-add-initialization-using-efch-mmio.patch watchdog-sp5100_tco-enable-family-17h-cpus.patch revert-drm-i915-opregion-check-port-number-bounds-for-swsci-display-power-state.patch +gfs2-cancel-timed-out-glock-requests.patch +gfs2-switch-lock-order-of-inode-and-iopen-glock.patch +rtc-fix-use-after-free-on-device-removal.patch +rtc-pcf2127-fix-bug-when-reading-alarm-registers.patch +kconfig-add-fflush-before-ferror-check.patch +um-cleanup-syscall_handler_t-definition-cast-fix-war.patch +um-port_user-improve-error-handling-when-port-helper.patch +input-add-bounds-checking-to-input_set_capability.patch +input-stmfts-fix-reference-leak-in-stmfts_input_open.patch +nvme-pci-add-quirks-for-samsung-x5-ssds.patch +gfs2-disable-page-faults-during-lockless-buffered-re.patch +rtc-sun6i-fix-time-overflow-handling.patch +crypto-stm32-fix-reference-leak-in-stm32_crc_remove.patch +crypto-x86-chacha20-avoid-spurious-jumps-to-other-fu.patch +alsa-hda-realtek-enable-headset-mic-on-lenovo-p360.patch +s390-traps-improve-panic-message-for-translation-spe.patch +s390-pci-improve-zpci_dev-reference-counting.patch +vhost_vdpa-don-t-setup-irq-offloading-when-irq_num-0.patch +tools-virtio-compile-with-pthread.patch +smb3-cleanup-and-clarify-status-of-tree-connections.patch +nvmet-use-a-private-workqueue-instead-of-the-system-.patch +nvme-multipath-fix-hang-when-disk-goes-live-over-rec.patch +rtc-mc146818-lib-fix-the-altcentury-for-amd-platform.patch +fs-fix-an-infinite-loop-in-iomap_fiemap.patch +mips-lantiq-check-the-return-value-of-kzalloc.patch +drbd-remove-usage-of-list-iterator-variable-after-lo.patch +platform-chrome-cros_ec_debugfs-detach-log-reader-wq.patch +arm-9191-1-arm-stacktrace-kasan-silence-kasan-warnin.patch +nilfs2-fix-lockdep-warnings-in-page-operations-for-b.patch +nilfs2-fix-lockdep-warnings-during-disk-space-reclam.patch diff --git a/queue-5.17/smb3-cleanup-and-clarify-status-of-tree-connections.patch b/queue-5.17/smb3-cleanup-and-clarify-status-of-tree-connections.patch new file mode 100644 index 00000000000..5db81e2c353 --- /dev/null +++ b/queue-5.17/smb3-cleanup-and-clarify-status-of-tree-connections.patch @@ -0,0 +1,285 @@ +From 5cf4d2dd5addceeaf343fc7b104022492390152a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Mar 2022 16:07:30 -0500 +Subject: smb3: cleanup and clarify status of tree connections + +From: Steve French + +[ Upstream commit fdf59eb548e51bce81382c39f1a5fd4cb9403b78 ] + +Currently the way the tid (tree connection) status is tracked +is confusing. The same enum is used for structs cifs_tcon +and cifs_ses and TCP_Server_info, but each of these three has +different states that they transition among. The current +code also unnecessarily uses camelCase. + +Convert from use of statusEnum to a new tid_status_enum for +tree connections. The valid states for a tid are: + + TID_NEW = 0, + TID_GOOD, + TID_EXITING, + TID_NEED_RECON, + TID_NEED_TCON, + TID_IN_TCON, + TID_NEED_FILES_INVALIDATE, /* unused, considering removing in future */ + TID_IN_FILES_INVALIDATE + +It also removes CifsNeedTcon, CifsInTcon, CifsNeedFilesInvalidate and +CifsInFilesInvalidate from the statusEnum used for session and +TCP_Server_Info since they are not relevant for those. + +A follow on patch will fix the places where we use the +tcon->need_reconnect flag to be more consistent with the tid->status. + +Also fixes a bug that was: +Reported-by: kernel test robot +Reviewed-by: Shyam Prasad N +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/cifs/cifs_debug.c | 2 +- + fs/cifs/cifsfs.c | 4 ++-- + fs/cifs/cifsglob.h | 18 +++++++++++++----- + fs/cifs/cifssmb.c | 11 +++++------ + fs/cifs/connect.c | 32 ++++++++++++++++---------------- + fs/cifs/misc.c | 2 +- + fs/cifs/smb2pdu.c | 4 ++-- + 7 files changed, 40 insertions(+), 33 deletions(-) + +diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c +index ea00e1a91250..9d334816eac0 100644 +--- a/fs/cifs/cifs_debug.c ++++ b/fs/cifs/cifs_debug.c +@@ -94,7 +94,7 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) + le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), + le32_to_cpu(tcon->fsAttrInfo.Attributes), + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), +- tcon->tidStatus); ++ tcon->status); + if (dev_type == FILE_DEVICE_DISK) + seq_puts(m, " type: DISK "); + else if (dev_type == FILE_DEVICE_CD_ROM) +diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c +index 10aa0fb94613..59d22261e082 100644 +--- a/fs/cifs/cifsfs.c ++++ b/fs/cifs/cifsfs.c +@@ -701,14 +701,14 @@ static void cifs_umount_begin(struct super_block *sb) + tcon = cifs_sb_master_tcon(cifs_sb); + + spin_lock(&cifs_tcp_ses_lock); +- if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { ++ if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { + /* we have other mounts to same share or we have + already tried to force umount this and woken up + all waiting network requests, nothing to do */ + spin_unlock(&cifs_tcp_ses_lock); + return; + } else if (tcon->tc_count == 1) +- tcon->tidStatus = CifsExiting; ++ tcon->status = TID_EXITING; + spin_unlock(&cifs_tcp_ses_lock); + + /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ +diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h +index 48b343d03430..560ecc4ad87d 100644 +--- a/fs/cifs/cifsglob.h ++++ b/fs/cifs/cifsglob.h +@@ -115,10 +115,18 @@ enum statusEnum { + CifsInNegotiate, + CifsNeedSessSetup, + CifsInSessSetup, +- CifsNeedTcon, +- CifsInTcon, +- CifsNeedFilesInvalidate, +- CifsInFilesInvalidate ++}; ++ ++/* associated with each tree connection to the server */ ++enum tid_status_enum { ++ TID_NEW = 0, ++ TID_GOOD, ++ TID_EXITING, ++ TID_NEED_RECON, ++ TID_NEED_TCON, ++ TID_IN_TCON, ++ TID_NEED_FILES_INVALIDATE, /* currently unused */ ++ TID_IN_FILES_INVALIDATE + }; + + enum securityEnum { +@@ -1038,7 +1046,7 @@ struct cifs_tcon { + char *password; /* for share-level security */ + __u32 tid; /* The 4 byte tree id */ + __u16 Flags; /* optional support bits */ +- enum statusEnum tidStatus; ++ enum tid_status_enum status; + atomic_t num_smbs_sent; + union { + struct { +diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c +index 071e2f21a7db..aca9338b0877 100644 +--- a/fs/cifs/cifssmb.c ++++ b/fs/cifs/cifssmb.c +@@ -75,12 +75,11 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) + + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->ses->status != CifsGood || +- tcon->tidStatus != CifsNeedReconnect) { ++ if ((tcon->ses->status != CifsGood) || (tcon->status != TID_NEED_RECON)) { + spin_unlock(&cifs_tcp_ses_lock); + return; + } +- tcon->tidStatus = CifsInFilesInvalidate; ++ tcon->status = TID_IN_FILES_INVALIDATE; + spin_unlock(&cifs_tcp_ses_lock); + + /* list all files open on tree connection and mark them invalid */ +@@ -100,8 +99,8 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) + mutex_unlock(&tcon->crfid.fid_mutex); + + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsInFilesInvalidate) +- tcon->tidStatus = CifsNeedTcon; ++ if (tcon->status == TID_IN_FILES_INVALIDATE) ++ tcon->status = TID_NEED_TCON; + spin_unlock(&cifs_tcp_ses_lock); + + /* +@@ -136,7 +135,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) + * have tcon) are allowed as we start force umount + */ + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsExiting) { ++ if (tcon->status == TID_EXITING) { + if (smb_command != SMB_COM_WRITE_ANDX && + smb_command != SMB_COM_OPEN_ANDX && + smb_command != SMB_COM_TREE_DISCONNECT) { +diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c +index 532770c30415..c3a26f06fdaa 100644 +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -245,7 +245,7 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, + + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + tcon->need_reconnect = true; +- tcon->tidStatus = CifsNeedReconnect; ++ tcon->status = TID_NEED_RECON; + } + if (ses->tcon_ipc) + ses->tcon_ipc->need_reconnect = true; +@@ -2217,7 +2217,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) + + static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) + { +- if (tcon->tidStatus == CifsExiting) ++ if (tcon->status == TID_EXITING) + return 0; + if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE)) + return 0; +@@ -4498,12 +4498,12 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || +- (tcon->tidStatus != CifsNew && +- tcon->tidStatus != CifsNeedTcon)) { ++ (tcon->status != TID_NEW && ++ tcon->status != TID_NEED_TCON)) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } +- tcon->tidStatus = CifsInTcon; ++ tcon->status = TID_IN_TCON; + spin_unlock(&cifs_tcp_ses_lock); + + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); +@@ -4544,13 +4544,13 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru + + if (rc) { + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsInTcon) +- tcon->tidStatus = CifsNeedTcon; ++ if (tcon->status == TID_IN_TCON) ++ tcon->status = TID_NEED_TCON; + spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsInTcon) +- tcon->tidStatus = CifsGood; ++ if (tcon->status == TID_IN_TCON) ++ tcon->status = TID_GOOD; + spin_unlock(&cifs_tcp_ses_lock); + tcon->need_reconnect = false; + } +@@ -4566,24 +4566,24 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || +- (tcon->tidStatus != CifsNew && +- tcon->tidStatus != CifsNeedTcon)) { ++ (tcon->status != TID_NEW && ++ tcon->status != TID_NEED_TCON)) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } +- tcon->tidStatus = CifsInTcon; ++ tcon->status = TID_IN_TCON; + spin_unlock(&cifs_tcp_ses_lock); + + rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); + if (rc) { + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsInTcon) +- tcon->tidStatus = CifsNeedTcon; ++ if (tcon->status == TID_IN_TCON) ++ tcon->status = TID_NEED_TCON; + spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsInTcon) +- tcon->tidStatus = CifsGood; ++ if (tcon->status == TID_IN_TCON) ++ tcon->status = TID_GOOD; + spin_unlock(&cifs_tcp_ses_lock); + tcon->need_reconnect = false; + } +diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c +index 56598f7dbe00..afaf59c22193 100644 +--- a/fs/cifs/misc.c ++++ b/fs/cifs/misc.c +@@ -116,7 +116,7 @@ tconInfoAlloc(void) + } + + atomic_inc(&tconInfoAllocCount); +- ret_buf->tidStatus = CifsNew; ++ ret_buf->status = TID_NEW; + ++ret_buf->tc_count; + INIT_LIST_HEAD(&ret_buf->openFileList); + INIT_LIST_HEAD(&ret_buf->tcon_list); +diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c +index f82d6fcb5c64..1704fd358b85 100644 +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -163,7 +163,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + return 0; + + spin_lock(&cifs_tcp_ses_lock); +- if (tcon->tidStatus == CifsExiting) { ++ if (tcon->status == TID_EXITING) { + /* + * only tree disconnect, open, and write, + * (and ulogoff which does not have tcon) +@@ -3865,7 +3865,7 @@ void smb2_reconnect_server(struct work_struct *work) + goto done; + } + +- tcon->tidStatus = CifsGood; ++ tcon->status = TID_GOOD; + tcon->retry = false; + tcon->need_reconnect = false; + +-- +2.35.1 + diff --git a/queue-5.17/tools-virtio-compile-with-pthread.patch b/queue-5.17/tools-virtio-compile-with-pthread.patch new file mode 100644 index 00000000000..efbd5de9fbd --- /dev/null +++ b/queue-5.17/tools-virtio-compile-with-pthread.patch @@ -0,0 +1,38 @@ +From 36093cdf65242553ac848df42360a482eddb2e55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Mar 2022 07:02:14 -0400 +Subject: tools/virtio: compile with -pthread + +From: Michael S. Tsirkin + +[ Upstream commit f03560a57c1f60db6ac23ffd9714e1c69e2f95c7 ] + +When using pthreads, one has to compile and link with -lpthread, +otherwise e.g. glibc is not guaranteed to be reentrant. + +This replaces -lpthread. + +Reported-by: Matthew Wilcox +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + tools/virtio/Makefile | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile +index 0d7bbe49359d..1b25cc7c64bb 100644 +--- a/tools/virtio/Makefile ++++ b/tools/virtio/Makefile +@@ -5,7 +5,8 @@ virtio_test: virtio_ring.o virtio_test.o + vringh_test: vringh_test.o vringh.o virtio_ring.o + + CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +-LDFLAGS += -lpthread ++CFLAGS += -pthread ++LDFLAGS += -pthread + vpath %.c ../../drivers/virtio ../../drivers/vhost + mod: + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} +-- +2.35.1 + diff --git a/queue-5.17/um-cleanup-syscall_handler_t-definition-cast-fix-war.patch b/queue-5.17/um-cleanup-syscall_handler_t-definition-cast-fix-war.patch new file mode 100644 index 00000000000..5e716f4bf3d --- /dev/null +++ b/queue-5.17/um-cleanup-syscall_handler_t-definition-cast-fix-war.patch @@ -0,0 +1,70 @@ +From 4fb219ad8b4ab6baf5a8100a15fb5d1bf8c0bf8b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Feb 2022 11:43:53 +0800 +Subject: um: Cleanup syscall_handler_t definition/cast, fix warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Gow + +[ Upstream commit f4f03f299a56ce4d73c5431e0327b3b6cb55ebb9 ] + +The syscall_handler_t type for x86_64 was defined as 'long (*)(void)', +but always cast to 'long (*)(long, long, long, long, long, long)' before +use. This now triggers a warning (see below). + +Define syscall_handler_t as the latter instead, and remove the cast. +This simplifies the code, and fixes the warning. + +Warning: +In file included from ../arch/um/include/asm/processor-generic.h:13 + from ../arch/x86/um/asm/processor.h:41 + from ../include/linux/rcupdate.h:30 + from ../include/linux/rculist.h:11 + from ../include/linux/pid.h:5 + from ../include/linux/sched.h:14 + from ../include/linux/ptrace.h:6 + from ../arch/um/kernel/skas/syscall.c:7: +../arch/um/kernel/skas/syscall.c: In function ‘handle_syscall’: +../arch/x86/um/shared/sysdep/syscalls_64.h:18:11: warning: cast between incompatible function types from ‘long int (*)(void)’ to ‘long int (*)(long int, long int, long int, long int, long int, long int)’ [ +-Wcast-function-type] + 18 | (((long (*)(long, long, long, long, long, long)) \ + | ^ +../arch/x86/um/asm/ptrace.h:36:62: note: in definition of macro ‘PT_REGS_SET_SYSCALL_RETURN’ + 36 | #define PT_REGS_SET_SYSCALL_RETURN(r, res) (PT_REGS_AX(r) = (res)) + | ^~~ +../arch/um/kernel/skas/syscall.c:46:33: note: in expansion of macro ‘EXECUTE_SYSCALL’ + 46 | EXECUTE_SYSCALL(syscall, regs)); + | ^~~~~~~~~~~~~~~ + +Signed-off-by: David Gow +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + arch/x86/um/shared/sysdep/syscalls_64.h | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h +index 48d6cd12f8a5..b6b997225841 100644 +--- a/arch/x86/um/shared/sysdep/syscalls_64.h ++++ b/arch/x86/um/shared/sysdep/syscalls_64.h +@@ -10,13 +10,12 @@ + #include + #include + +-typedef long syscall_handler_t(void); ++typedef long syscall_handler_t(long, long, long, long, long, long); + + extern syscall_handler_t *sys_call_table[]; + + #define EXECUTE_SYSCALL(syscall, regs) \ +- (((long (*)(long, long, long, long, long, long)) \ +- (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ ++ (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + UPT_SYSCALL_ARG2(®s->regs), \ + UPT_SYSCALL_ARG3(®s->regs), \ + UPT_SYSCALL_ARG4(®s->regs), \ +-- +2.35.1 + diff --git a/queue-5.17/um-port_user-improve-error-handling-when-port-helper.patch b/queue-5.17/um-port_user-improve-error-handling-when-port-helper.patch new file mode 100644 index 00000000000..eb9467d2ebb --- /dev/null +++ b/queue-5.17/um-port_user-improve-error-handling-when-port-helper.patch @@ -0,0 +1,55 @@ +From efc324ad7e7e1c92a8862bd71b2f5f8f15513304 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Mar 2022 01:53:32 -0600 +Subject: um: port_user: Improve error handling when port-helper is not found + +From: Glenn Washburn + +[ Upstream commit 3cb5a7f167c620a8b0e38b0446df2e024d2243dc ] + +Check if port-helper exists and is executable. If not, write an error +message to the kernel log with information to help the user diagnose the +issue and exit with an error. If UML_PORT_HELPER was not set, write a +message suggesting that the user set it. This makes it easier to understand +why telneting to the UML instance is failing and what can be done to fix it. + +Signed-off-by: Glenn Washburn +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + arch/um/drivers/port_user.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c +index 5b5b64cb1071..133ca7bf2d91 100644 +--- a/arch/um/drivers/port_user.c ++++ b/arch/um/drivers/port_user.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -175,6 +176,17 @@ int port_connection(int fd, int *socket, int *pid_out) + if (new < 0) + return -errno; + ++ err = os_access(argv[2], X_OK); ++ if (err < 0) { ++ printk(UM_KERN_ERR "port_connection : error accessing port-helper " ++ "executable at %s: %s\n", argv[2], strerror(-err)); ++ if (env == NULL) ++ printk(UM_KERN_ERR "Set UML_PORT_HELPER environment " ++ "variable to path to uml-utilities port-helper " ++ "binary\n"); ++ goto out_close; ++ } ++ + err = os_pipe(socket, 0, 0); + if (err < 0) + goto out_close; +-- +2.35.1 + diff --git a/queue-5.17/vhost_vdpa-don-t-setup-irq-offloading-when-irq_num-0.patch b/queue-5.17/vhost_vdpa-don-t-setup-irq-offloading-when-irq_num-0.patch new file mode 100644 index 00000000000..f5f5605324d --- /dev/null +++ b/queue-5.17/vhost_vdpa-don-t-setup-irq-offloading-when-irq_num-0.patch @@ -0,0 +1,41 @@ +From 963bba98e39dac79c697f124f66f7b61e9d161ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Feb 2022 19:54:25 +0800 +Subject: vhost_vdpa: don't setup irq offloading when irq_num < 0 + +From: Zhu Lingshan + +[ Upstream commit cce0ab2b2a39072d81f98017f7b076f3410ef740 ] + +When irq number is negative(e.g., -EINVAL), the virtqueue +may be disabled or the virtqueues are sharing a device irq. +In such case, we should not setup irq offloading for a virtqueue. + +Signed-off-by: Zhu Lingshan +Link: https://lore.kernel.org/r/20220222115428.998334-3-lingshan.zhu@intel.com +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/vdpa.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c +index ec5249e8c32d..05f5fd2af58f 100644 +--- a/drivers/vhost/vdpa.c ++++ b/drivers/vhost/vdpa.c +@@ -97,8 +97,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) + return; + + irq = ops->get_vq_irq(vdpa, qid); ++ if (irq < 0) ++ return; ++ + irq_bypass_unregister_producer(&vq->call_ctx.producer); +- if (!vq->call_ctx.ctx || irq < 0) ++ if (!vq->call_ctx.ctx) + return; + + vq->call_ctx.producer.token = vq->call_ctx.ctx; +-- +2.35.1 +