From: Greg Kroah-Hartman Date: Sun, 23 Jul 2023 13:57:27 +0000 (+0200) Subject: 6.4-stable patches X-Git-Tag: v6.1.41~47 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7f10cbf82f8ecfe9a35ab03c0e9b38841ce2ee3a;p=thirdparty%2Fkernel%2Fstable-queue.git 6.4-stable patches added patches: accel-qaic-fix-a-leak-in-map_user_pages.patch btrfs-fix-double-iput-on-inode-after-an-error-during-orphan-cleanup.patch btrfs-fix-iput-on-error-pointer-after-error-during-orphan-cleanup.patch btrfs-fix-race-between-balance-and-cancel-pause.patch btrfs-fix-warning-when-putting-transaction-with-qgroups-enabled-after-abort.patch btrfs-raid56-always-verify-the-p-q-contents-for-scrub.patch btrfs-set_page_extent_mapped-after-read_folio-in-btrfs_cont_expand.patch btrfs-zoned-fix-memory-leak-after-finding-block-group-with-super-blocks.patch fuse-add-feature-flag-for-expire-only.patch fuse-apply-flags2-only-when-userspace-set-the-fuse_init_ext.patch fuse-ioctl-translate-enosys-in-outarg.patch fuse-revalidate-don-t-invalidate-if-interrupted.patch keys-fix-linking-a-duplicate-key-to-a-keyring-s-assoc_array.patch maple_tree-fix-node-allocation-testing-on-32-bit.patch maple_tree-set-the-node-limit-when-creating-a-new-root-node.patch mm-mlock-fix-vma-iterator-conversion-of-apply_vma_lock_flags.patch perf-probe-add-test-for-regression-introduced-by-switch-to-die_get_decl_file.patch perf-probe-read-dwarf-files-from-the-correct-cu.patch prctl-move-pr_get_auxv-out-of-pr_mce_kill.patch selftests-mm-mkdirty-fix-incorrect-position-of-endif.patch selftests-tc-add-ct-action-kconfig-dep.patch selftests-tc-set-timeout-to-15-minutes.patch --- diff --git a/queue-6.4/accel-qaic-fix-a-leak-in-map_user_pages.patch b/queue-6.4/accel-qaic-fix-a-leak-in-map_user_pages.patch new file mode 100644 index 00000000000..0c67f9f16c0 --- /dev/null +++ b/queue-6.4/accel-qaic-fix-a-leak-in-map_user_pages.patch @@ -0,0 +1,43 @@ +From 73274c33d961f4aa0f968f763e2c9f4210b4f4a3 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 11 Jul 2023 11:21:13 +0300 +Subject: accel/qaic: Fix a leak in map_user_pages() + +From: Dan Carpenter + +commit 73274c33d961f4aa0f968f763e2c9f4210b4f4a3 upstream. + +If get_user_pages_fast() allocates some pages but not as many as we +wanted, then the current code leaks those pages. Call put_page() on +the pages before returning. + +Fixes: 129776ac2e38 ("accel/qaic: Add control path") +Signed-off-by: Dan Carpenter +Reviewed-by: Pranjal Ramajor Asha Kanojiya +Reviewed-by: Jeffrey Hugo +Reviewed-by: Dafna Hirschfeld +Cc: stable@vger.kernel.org # 6.4.x +Signed-off-by: Jeffrey Hugo +Link: https://patchwork.freedesktop.org/patch/msgid/ZK0Q+ZuONTsBG+1T@moroto +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/qaic/qaic_control.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/accel/qaic/qaic_control.c ++++ b/drivers/accel/qaic/qaic_control.c +@@ -418,9 +418,12 @@ static int find_and_map_user_pages(struc + } + + ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list); +- if (ret < 0 || ret != nr_pages) { +- ret = -EFAULT; ++ if (ret < 0) + goto free_page_list; ++ if (ret != nr_pages) { ++ nr_pages = ret; ++ ret = -EFAULT; ++ goto put_pages; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); diff --git a/queue-6.4/btrfs-fix-double-iput-on-inode-after-an-error-during-orphan-cleanup.patch b/queue-6.4/btrfs-fix-double-iput-on-inode-after-an-error-during-orphan-cleanup.patch new file mode 100644 index 00000000000..4286ab29aad --- /dev/null +++ b/queue-6.4/btrfs-fix-double-iput-on-inode-after-an-error-during-orphan-cleanup.patch @@ -0,0 +1,38 @@ +From b777d279ff31979add57e8a3f810bceb7ef0cfb7 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 3 Jul 2023 18:15:30 +0100 +Subject: btrfs: fix double iput() on inode after an error during orphan cleanup + +From: Filipe Manana + +commit b777d279ff31979add57e8a3f810bceb7ef0cfb7 upstream. + +At btrfs_orphan_cleanup(), if we were able to find the inode, we do an +iput() on the inode, then if btrfs_drop_verity_items() succeeds and then +either btrfs_start_transaction() or btrfs_del_orphan_item() fail, we do +another iput() in the respective error paths, resulting in an extra iput() +on the inode. + +Fix this by setting inode to NULL after the first iput(), as iput() +ignores a NULL inode pointer argument. + +Fixes: a13bb2c03848 ("btrfs: add missing iputs on orphan cleanup failure") +CC: stable@vger.kernel.org # 6.4 +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3618,6 +3618,7 @@ int btrfs_orphan_cleanup(struct btrfs_ro + if (inode) { + ret = btrfs_drop_verity_items(BTRFS_I(inode)); + iput(inode); ++ inode = NULL; + if (ret) + goto out; + } diff --git a/queue-6.4/btrfs-fix-iput-on-error-pointer-after-error-during-orphan-cleanup.patch b/queue-6.4/btrfs-fix-iput-on-error-pointer-after-error-during-orphan-cleanup.patch new file mode 100644 index 00000000000..f8422ed5b29 --- /dev/null +++ b/queue-6.4/btrfs-fix-iput-on-error-pointer-after-error-during-orphan-cleanup.patch @@ -0,0 +1,173 @@ +From cbaee87f2ef628c10331b69a2f3def6bc32402d7 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 3 Jul 2023 18:15:31 +0100 +Subject: btrfs: fix iput() on error pointer after error during orphan cleanup + +From: Filipe Manana + +commit cbaee87f2ef628c10331b69a2f3def6bc32402d7 upstream. + +At btrfs_orphan_cleanup(), if we can't find an inode (btrfs_iget() returns +an -ENOENT error pointer), we proceed with 'ret' set to -ENOENT and the +inode pointer set to ERR_PTR(-ENOENT). Later when we proceed to the body +of the following if statement: + + if (ret == -ENOENT || inode->i_nlink) { + (...) + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + iput(inode); + goto out; + } + (...) + ret = btrfs_del_orphan_item(trans, root, + found_key.objectid); + btrfs_end_transaction(trans); + if (ret) { + iput(inode); + goto out; + } + continue; + } + +If we get an error from btrfs_start_transaction() or from the call to +btrfs_del_orphan_item() we end calling iput() against an inode pointer +that has a value of ERR_PTR(-ENOENT), resulting in a crash with the +following trace: + + [876.667] BUG: kernel NULL pointer dereference, address: 0000000000000096 + [876.667] #PF: supervisor read access in kernel mode + [876.667] #PF: error_code(0x0000) - not-present page + [876.667] PGD 0 P4D 0 + [876.668] Oops: 0000 [#1] PREEMPT SMP PTI + [876.668] CPU: 0 PID: 2356187 Comm: mount Tainted: G W 6.4.0-rc6-btrfs-next-134+ #1 + [876.668] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 + [876.668] RIP: 0010:iput+0xa/0x20 + [876.668] Code: ff ff ff 66 (...) + [876.669] RSP: 0018:ffffafa9c0c9f9d0 EFLAGS: 00010282 + [876.669] RAX: ffffffffffffffe4 RBX: 000000000009453b RCX: 0000000000000000 + [876.669] RDX: 0000000000000001 RSI: ffffafa9c0c9f930 RDI: fffffffffffffffe + [876.669] RBP: ffff95c612f3b800 R08: 0000000000000001 R09: ffffffffffffffe4 + [876.670] R10: 00018f2a71010000 R11: 000000000ead96e3 R12: ffff95cb7d6909a0 + [876.670] R13: fffffffffffffffe R14: ffff95c60f477000 R15: 00000000ffffffe4 + [876.670] FS: 00007f5fbe30a840(0000) GS:ffff95ccdfa00000(0000) knlGS:0000000000000000 + [876.670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [876.671] CR2: 0000000000000096 CR3: 000000055e9f6004 CR4: 0000000000370ef0 + [876.671] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [876.671] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [876.672] Call Trace: + [876.744] + [876.744] ? __die_body+0x1b/0x60 + [876.744] ? page_fault_oops+0x15d/0x450 + [876.745] ? __kmem_cache_alloc_node+0x47/0x410 + [876.745] ? do_user_addr_fault+0x65/0x8a0 + [876.745] ? exc_page_fault+0x74/0x170 + [876.746] ? asm_exc_page_fault+0x22/0x30 + [876.746] ? iput+0xa/0x20 + [876.746] btrfs_orphan_cleanup+0x221/0x330 [btrfs] + [876.746] btrfs_lookup_dentry+0x58f/0x5f0 [btrfs] + [876.747] btrfs_lookup+0xe/0x30 [btrfs] + [876.747] __lookup_slow+0x82/0x130 + [876.785] walk_component+0xe5/0x160 + [876.786] path_lookupat.isra.0+0x6e/0x150 + [876.786] filename_lookup+0xcf/0x1a0 + [876.786] ? mod_objcg_state+0xd2/0x360 + [876.786] ? obj_cgroup_charge+0xf5/0x110 + [876.787] ? should_failslab+0xa/0x20 + [876.787] ? kmem_cache_alloc+0x47/0x450 + [876.787] vfs_path_lookup+0x51/0x90 + [876.788] mount_subtree+0x8d/0x130 + [876.788] btrfs_mount+0x149/0x410 [btrfs] + [876.788] ? __kmem_cache_alloc_node+0x47/0x410 + [876.788] ? vfs_parse_fs_param+0xc0/0x110 + [876.789] legacy_get_tree+0x24/0x50 + [876.834] vfs_get_tree+0x22/0xd0 + [876.852] path_mount+0x2d8/0x9c0 + [876.852] do_mount+0x79/0x90 + [876.852] __x64_sys_mount+0x8e/0xd0 + [876.853] do_syscall_64+0x38/0x90 + [876.899] entry_SYSCALL_64_after_hwframe+0x72/0xdc + [876.958] RIP: 0033:0x7f5fbe50b76a + [876.959] Code: 48 8b 0d a9 (...) + [876.959] RSP: 002b:00007fff01925798 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 + [876.959] RAX: ffffffffffffffda RBX: 00007f5fbe694264 RCX: 00007f5fbe50b76a + [876.960] RDX: 0000561bde6c8720 RSI: 0000561bde6bdec0 RDI: 0000561bde6c31a0 + [876.960] RBP: 0000561bde6bdc70 R08: 0000000000000000 R09: 0000000000000001 + [876.960] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + [876.960] R13: 0000561bde6c31a0 R14: 0000561bde6c8720 R15: 0000561bde6bdc70 + [876.960] + +So fix this by setting 'inode' to NULL whenever we get an error from +btrfs_iget(), and to make the code simpler, stop testing for 'ret' being +-ENOENT to check if we have an inode - instead test for 'inode' being NULL +or not. Having a NULL 'inode' prevents any iput() call from crashing, as +iput() ignores NULL inode pointers. Also, stop testing for a NULL return +value from btrfs_iget() with PTR_ERR_OR_ZERO(), because btrfs_iget() never +returns NULL - in case an inode is not found, it returns ERR_PTR(-ENOENT), +and in case of memory allocation failure, it returns ERR_PTR(-ENOMEM). +We also don't need the extra iput() calls on the error branches for the +btrfs_start_transaction() and btrfs_del_orphan_item() calls, as we have +already called iput() before, so remove them. + +Fixes: a13bb2c03848 ("btrfs: add missing iputs on orphan cleanup failure") +CC: stable@vger.kernel.org # 6.4 +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3546,11 +3546,14 @@ int btrfs_orphan_cleanup(struct btrfs_ro + found_key.type = BTRFS_INODE_ITEM_KEY; + found_key.offset = 0; + inode = btrfs_iget(fs_info->sb, last_objectid, root); +- ret = PTR_ERR_OR_ZERO(inode); +- if (ret && ret != -ENOENT) +- goto out; ++ if (IS_ERR(inode)) { ++ ret = PTR_ERR(inode); ++ inode = NULL; ++ if (ret != -ENOENT) ++ goto out; ++ } + +- if (ret == -ENOENT && root == fs_info->tree_root) { ++ if (!inode && root == fs_info->tree_root) { + struct btrfs_root *dead_root; + int is_dead_root = 0; + +@@ -3611,8 +3614,8 @@ int btrfs_orphan_cleanup(struct btrfs_ro + * deleted but wasn't. The inode number may have been reused, + * but either way, we can delete the orphan item. + */ +- if (ret == -ENOENT || inode->i_nlink) { +- if (!ret) { ++ if (!inode || inode->i_nlink) { ++ if (inode) { + ret = btrfs_drop_verity_items(BTRFS_I(inode)); + iput(inode); + if (ret) +@@ -3621,7 +3624,6 @@ int btrfs_orphan_cleanup(struct btrfs_ro + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); +- iput(inode); + goto out; + } + btrfs_debug(fs_info, "auto deleting %Lu", +@@ -3629,10 +3631,8 @@ int btrfs_orphan_cleanup(struct btrfs_ro + ret = btrfs_del_orphan_item(trans, root, + found_key.objectid); + btrfs_end_transaction(trans); +- if (ret) { +- iput(inode); ++ if (ret) + goto out; +- } + continue; + } + diff --git a/queue-6.4/btrfs-fix-race-between-balance-and-cancel-pause.patch b/queue-6.4/btrfs-fix-race-between-balance-and-cancel-pause.patch new file mode 100644 index 00000000000..4723e94616d --- /dev/null +++ b/queue-6.4/btrfs-fix-race-between-balance-and-cancel-pause.patch @@ -0,0 +1,96 @@ +From b19c98f237cd76981aaded52c258ce93f7daa8cb Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 23 Jun 2023 01:05:41 -0400 +Subject: btrfs: fix race between balance and cancel/pause + +From: Josef Bacik + +commit b19c98f237cd76981aaded52c258ce93f7daa8cb upstream. + +Syzbot reported a panic that looks like this: + + assertion failed: fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED, in fs/btrfs/ioctl.c:465 + ------------[ cut here ]------------ + kernel BUG at fs/btrfs/messages.c:259! + RIP: 0010:btrfs_assertfail+0x2c/0x30 fs/btrfs/messages.c:259 + Call Trace: + + btrfs_exclop_balance fs/btrfs/ioctl.c:465 [inline] + btrfs_ioctl_balance fs/btrfs/ioctl.c:3564 [inline] + btrfs_ioctl+0x531e/0x5b30 fs/btrfs/ioctl.c:4632 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:870 [inline] + __se_sys_ioctl fs/ioctl.c:856 [inline] + __x64_sys_ioctl+0x197/0x210 fs/ioctl.c:856 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +The reproducer is running a balance and a cancel or pause in parallel. +The way balance finishes is a bit wonky, if we were paused we need to +save the balance_ctl in the fs_info, but clear it otherwise and cleanup. +However we rely on the return values being specific errors, or having a +cancel request or no pause request. If balance completes and returns 0, +but we have a pause or cancel request we won't do the appropriate +cleanup, and then the next time we try to start a balance we'll trip +this ASSERT. + +The error handling is just wrong here, we always want to clean up, +unless we got -ECANCELLED and we set the appropriate pause flag in the +exclusive op. With this patch the reproducer ran for an hour without +tripping, previously it would trip in less than a few minutes. + +Reported-by: syzbot+c0f3acf145cb465426d5@syzkaller.appspotmail.com +CC: stable@vger.kernel.org # 6.1+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -4071,14 +4071,6 @@ static int alloc_profile_is_valid(u64 fl + return has_single_bit_set(flags); + } + +-static inline int balance_need_close(struct btrfs_fs_info *fs_info) +-{ +- /* cancel requested || normal exit path */ +- return atomic_read(&fs_info->balance_cancel_req) || +- (atomic_read(&fs_info->balance_pause_req) == 0 && +- atomic_read(&fs_info->balance_cancel_req) == 0); +-} +- + /* + * Validate target profile against allowed profiles and return true if it's OK. + * Otherwise print the error message and return false. +@@ -4268,6 +4260,7 @@ int btrfs_balance(struct btrfs_fs_info * + u64 num_devices; + unsigned seq; + bool reducing_redundancy; ++ bool paused = false; + int i; + + if (btrfs_fs_closing(fs_info) || +@@ -4398,6 +4391,7 @@ int btrfs_balance(struct btrfs_fs_info * + if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) { + btrfs_info(fs_info, "balance: paused"); + btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED); ++ paused = true; + } + /* + * Balance can be canceled by: +@@ -4426,8 +4420,8 @@ int btrfs_balance(struct btrfs_fs_info * + btrfs_update_ioctl_balance_args(fs_info, bargs); + } + +- if ((ret && ret != -ECANCELED && ret != -ENOSPC) || +- balance_need_close(fs_info)) { ++ /* We didn't pause, we can clean everything up. */ ++ if (!paused) { + reset_balance_state(fs_info); + btrfs_exclop_finish(fs_info); + } diff --git a/queue-6.4/btrfs-fix-warning-when-putting-transaction-with-qgroups-enabled-after-abort.patch b/queue-6.4/btrfs-fix-warning-when-putting-transaction-with-qgroups-enabled-after-abort.patch new file mode 100644 index 00000000000..bd3953815bd --- /dev/null +++ b/queue-6.4/btrfs-fix-warning-when-putting-transaction-with-qgroups-enabled-after-abort.patch @@ -0,0 +1,89 @@ +From aa84ce8a78a1a5c10cdf9c7a5fb0c999fbc2c8d6 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 14 Jul 2023 13:42:06 +0100 +Subject: btrfs: fix warning when putting transaction with qgroups enabled after abort + +From: Filipe Manana + +commit aa84ce8a78a1a5c10cdf9c7a5fb0c999fbc2c8d6 upstream. + +If we have a transaction abort with qgroups enabled we get a warning +triggered when doing the final put on the transaction, like this: + + [552.6789] ------------[ cut here ]------------ + [552.6815] WARNING: CPU: 4 PID: 81745 at fs/btrfs/transaction.c:144 btrfs_put_transaction+0x123/0x130 [btrfs] + [552.6817] Modules linked in: btrfs blake2b_generic xor (...) + [552.6819] CPU: 4 PID: 81745 Comm: btrfs-transacti Tainted: G W 6.4.0-rc6-btrfs-next-134+ #1 + [552.6819] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 + [552.6819] RIP: 0010:btrfs_put_transaction+0x123/0x130 [btrfs] + [552.6821] Code: bd a0 01 00 (...) + [552.6821] RSP: 0018:ffffa168c0527e28 EFLAGS: 00010286 + [552.6821] RAX: ffff936042caed00 RBX: ffff93604a3eb448 RCX: 0000000000000000 + [552.6821] RDX: ffff93606421b028 RSI: ffffffff92ff0878 RDI: ffff93606421b010 + [552.6821] RBP: ffff93606421b000 R08: 0000000000000000 R09: ffffa168c0d07c20 + [552.6821] R10: 0000000000000000 R11: ffff93608dc52950 R12: ffffa168c0527e70 + [552.6821] R13: ffff93606421b000 R14: ffff93604a3eb420 R15: ffff93606421b028 + [552.6821] FS: 0000000000000000(0000) GS:ffff93675fb00000(0000) knlGS:0000000000000000 + [552.6821] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [552.6821] CR2: 0000558ad262b000 CR3: 000000014feda005 CR4: 0000000000370ee0 + [552.6822] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [552.6822] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [552.6822] Call Trace: + [552.6822] + [552.6822] ? __warn+0x80/0x130 + [552.6822] ? btrfs_put_transaction+0x123/0x130 [btrfs] + [552.6824] ? report_bug+0x1f4/0x200 + [552.6824] ? handle_bug+0x42/0x70 + [552.6824] ? exc_invalid_op+0x14/0x70 + [552.6824] ? asm_exc_invalid_op+0x16/0x20 + [552.6824] ? btrfs_put_transaction+0x123/0x130 [btrfs] + [552.6826] btrfs_cleanup_transaction+0xe7/0x5e0 [btrfs] + [552.6828] ? _raw_spin_unlock_irqrestore+0x23/0x40 + [552.6828] ? try_to_wake_up+0x94/0x5e0 + [552.6828] ? __pfx_process_timeout+0x10/0x10 + [552.6828] transaction_kthread+0x103/0x1d0 [btrfs] + [552.6830] ? __pfx_transaction_kthread+0x10/0x10 [btrfs] + [552.6832] kthread+0xee/0x120 + [552.6832] ? __pfx_kthread+0x10/0x10 + [552.6832] ret_from_fork+0x29/0x50 + [552.6832] + [552.6832] ---[ end trace 0000000000000000 ]--- + +This corresponds to this line of code: + + void btrfs_put_transaction(struct btrfs_transaction *transaction) + { + (...) + WARN_ON(!RB_EMPTY_ROOT( + &transaction->delayed_refs.dirty_extent_root)); + (...) + } + +The warning happens because btrfs_qgroup_destroy_extent_records(), called +in the transaction abort path, we free all entries from the rbtree +"dirty_extent_root" with rbtree_postorder_for_each_entry_safe(), but we +don't actually empty the rbtree - it's still pointing to nodes that were +freed. + +So set the rbtree's root node to NULL to avoid this warning (assign +RB_ROOT). + +Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort") +CC: stable@vger.kernel.org # 5.10+ +Reviewed-by: Josef Bacik +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/qgroup.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -4433,4 +4433,5 @@ void btrfs_qgroup_destroy_extent_records + ulist_free(entry->old_roots); + kfree(entry); + } ++ *root = RB_ROOT; + } diff --git a/queue-6.4/btrfs-raid56-always-verify-the-p-q-contents-for-scrub.patch b/queue-6.4/btrfs-raid56-always-verify-the-p-q-contents-for-scrub.patch new file mode 100644 index 00000000000..37434664335 --- /dev/null +++ b/queue-6.4/btrfs-raid56-always-verify-the-p-q-contents-for-scrub.patch @@ -0,0 +1,117 @@ +From 486c737f7fdc0c3f6464cf27ede811daec2769a1 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 30 Jun 2023 08:56:40 +0800 +Subject: btrfs: raid56: always verify the P/Q contents for scrub + +From: Qu Wenruo + +commit 486c737f7fdc0c3f6464cf27ede811daec2769a1 upstream. + +[REGRESSION] +Commit 75b470332965 ("btrfs: raid56: migrate recovery and scrub recovery +path to use error_bitmap") changed the behavior of scrub_rbio(). + +Initially if we have no error reading the raid bio, we will assign +@need_check to true, then finish_parity_scrub() would later verify the +content of P/Q stripes before writeback. + +But after that commit we never verify the content of P/Q stripes and +just writeback them. + +This can lead to unrepaired P/Q stripes during scrub, or already +corrupted P/Q copied to the dev-replace target. + +[FIX] +The situation is more complex than the regression, in fact the initial +behavior is not 100% correct either. + +If we have the following rare case, it can still lead to the same +problem using the old behavior: + + 0 16K 32K 48K 64K + Data 1: |IIIIIII| | + Data 2: | | + Parity: | |CCCCCCC| | + +Where "I" means IO error, "C" means corruption. + +In the above case, we're scrubbing the parity stripe, then read out all +the contents of Data 1, Data 2, Parity stripes. + +But found IO error in Data 1, which leads to rebuild using Data 2 and +Parity and got the correct data. + +In that case, we would not verify if the Parity is correct for range +[16K, 32K). + +So here we have to always verify the content of Parity no matter if we +did recovery or not. + +This patch would remove the @need_check parameter of +finish_parity_scrub() completely, and would always do the P/Q +verification before writeback. + +Fixes: 75b470332965 ("btrfs: raid56: migrate recovery and scrub recovery path to use error_bitmap") +CC: stable@vger.kernel.org # 6.2+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/raid56.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/raid56.c ++++ b/fs/btrfs/raid56.c +@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct + static void index_rbio_pages(struct btrfs_raid_bio *rbio); + static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); + +-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); ++static int finish_parity_scrub(struct btrfs_raid_bio *rbio); + static void scrub_rbio_work_locked(struct work_struct *work); + + static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) +@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(st + return 0; + } + +-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) ++static int finish_parity_scrub(struct btrfs_raid_bio *rbio) + { + struct btrfs_io_context *bioc = rbio->bioc; + const u32 sectorsize = bioc->fs_info->sectorsize; +@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct bt + */ + clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); + +- if (!need_check) +- goto writeback; +- + p_sector.page = alloc_page(GFP_NOFS); + if (!p_sector.page) + return -ENOMEM; +@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct bt + q_sector.page = NULL; + } + +-writeback: + /* + * time to start writing. Make bios for everything from the + * higher layers (the bio_list in our rbio) and our p/q. Ignore +@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(stru + + static void scrub_rbio(struct btrfs_raid_bio *rbio) + { +- bool need_check = false; + int sector_nr; + int ret; + +@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid + * We have every sector properly prepared. Can finish the scrub + * and writeback the good content. + */ +- ret = finish_parity_scrub(rbio, need_check); ++ ret = finish_parity_scrub(rbio); + wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); + for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { + int found_errors; diff --git a/queue-6.4/btrfs-set_page_extent_mapped-after-read_folio-in-btrfs_cont_expand.patch b/queue-6.4/btrfs-set_page_extent_mapped-after-read_folio-in-btrfs_cont_expand.patch new file mode 100644 index 00000000000..db129c6473f --- /dev/null +++ b/queue-6.4/btrfs-set_page_extent_mapped-after-read_folio-in-btrfs_cont_expand.patch @@ -0,0 +1,98 @@ +From 17b17fcd6d446b95904a6929c40012ee7f0afc0c Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 12 Jul 2023 12:44:12 -0400 +Subject: btrfs: set_page_extent_mapped after read_folio in btrfs_cont_expand + +From: Josef Bacik + +commit 17b17fcd6d446b95904a6929c40012ee7f0afc0c upstream. + +While trying to get the subpage blocksize tests running, I hit the +following panic on generic/476 + + assertion failed: PagePrivate(page) && page->private, in fs/btrfs/subpage.c:229 + kernel BUG at fs/btrfs/subpage.c:229! + Internal error: Oops - BUG: 00000000f2000800 [#1] SMP + CPU: 1 PID: 1453 Comm: fsstress Not tainted 6.4.0-rc7+ #12 + Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20230301gitf80f052277c8-26.fc38 03/01/2023 + pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) + pc : btrfs_subpage_assert+0xbc/0xf0 + lr : btrfs_subpage_assert+0xbc/0xf0 + Call trace: + btrfs_subpage_assert+0xbc/0xf0 + btrfs_subpage_clear_checked+0x38/0xc0 + btrfs_page_clear_checked+0x48/0x98 + btrfs_truncate_block+0x5d0/0x6a8 + btrfs_cont_expand+0x5c/0x528 + btrfs_write_check.isra.0+0xf8/0x150 + btrfs_buffered_write+0xb4/0x760 + btrfs_do_write_iter+0x2f8/0x4b0 + btrfs_file_write_iter+0x1c/0x30 + do_iter_readv_writev+0xc8/0x158 + do_iter_write+0x9c/0x210 + vfs_iter_write+0x24/0x40 + iter_file_splice_write+0x224/0x390 + direct_splice_actor+0x38/0x68 + splice_direct_to_actor+0x12c/0x260 + do_splice_direct+0x90/0xe8 + generic_copy_file_range+0x50/0x90 + vfs_copy_file_range+0x29c/0x470 + __arm64_sys_copy_file_range+0xcc/0x498 + invoke_syscall.constprop.0+0x80/0xd8 + do_el0_svc+0x6c/0x168 + el0_svc+0x50/0x1b0 + el0t_64_sync_handler+0x114/0x120 + el0t_64_sync+0x194/0x198 + +This happens because during btrfs_cont_expand we'll get a page, set it +as mapped, and if it's not Uptodate we'll read it. However between the +read and re-locking the page we could have called release_folio() on the +page, but left the page in the file mapping. release_folio() can clear +the page private, and thus further down we blow up when we go to modify +the subpage bits. + +Fix this by putting the set_page_extent_mapped() after the read. This +is safe because read_folio() will call set_page_extent_mapped() before +it does the read, and then if we clear page private but leave it on the +mapping we're completely safe re-setting set_page_extent_mapped(). With +this patch I can now run generic/476 without panicing. + +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Christoph Hellwig +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4734,9 +4734,6 @@ again: + ret = -ENOMEM; + goto out; + } +- ret = set_page_extent_mapped(page); +- if (ret < 0) +- goto out_unlock; + + if (!PageUptodate(page)) { + ret = btrfs_read_folio(NULL, page_folio(page)); +@@ -4751,6 +4748,17 @@ again: + goto out_unlock; + } + } ++ ++ /* ++ * We unlock the page after the io is completed and then re-lock it ++ * above. release_folio() could have come in between that and cleared ++ * PagePrivate(), but left the page in the mapping. Set the page mapped ++ * here to make sure it's properly set for the subpage stuff. ++ */ ++ ret = set_page_extent_mapped(page); ++ if (ret < 0) ++ goto out_unlock; ++ + wait_on_page_writeback(page); + + lock_extent(io_tree, block_start, block_end, &cached_state); diff --git a/queue-6.4/btrfs-zoned-fix-memory-leak-after-finding-block-group-with-super-blocks.patch b/queue-6.4/btrfs-zoned-fix-memory-leak-after-finding-block-group-with-super-blocks.patch new file mode 100644 index 00000000000..a32631ad3e0 --- /dev/null +++ b/queue-6.4/btrfs-zoned-fix-memory-leak-after-finding-block-group-with-super-blocks.patch @@ -0,0 +1,38 @@ +From f1a07c2b4e2c473ec322b8b9ece071b8c88a3512 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 3 Jul 2023 12:03:21 +0100 +Subject: btrfs: zoned: fix memory leak after finding block group with super blocks + +From: Filipe Manana + +commit f1a07c2b4e2c473ec322b8b9ece071b8c88a3512 upstream. + +At exclude_super_stripes(), if we happen to find a block group that has +super blocks mapped to it and we are on a zoned filesystem, we error out +as this is not supposed to happen, indicating either a bug or maybe some +memory corruption for example. However we are exiting the function without +freeing the memory allocated for the logical address of the super blocks. +Fix this by freeing the logical address. + +Fixes: 12659251ca5d ("btrfs: implement log-structured superblock for ZONED mode") +CC: stable@vger.kernel.org # 5.10+ +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/block-group.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -2084,6 +2084,7 @@ static int exclude_super_stripes(struct + + /* Shouldn't have super stripes in sequential zones */ + if (zoned && nr) { ++ kfree(logical); + btrfs_err(fs_info, + "zoned: block group %llu must not contain super block", + cache->start); diff --git a/queue-6.4/fuse-add-feature-flag-for-expire-only.patch b/queue-6.4/fuse-add-feature-flag-for-expire-only.patch new file mode 100644 index 00000000000..ea9c473f5ac --- /dev/null +++ b/queue-6.4/fuse-add-feature-flag-for-expire-only.patch @@ -0,0 +1,62 @@ +From 5cadfbd5a11e5495cac217534c5f788168b1afd7 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Mon, 27 Mar 2023 16:14:49 +0200 +Subject: fuse: add feature flag for expire-only + +From: Miklos Szeredi + +commit 5cadfbd5a11e5495cac217534c5f788168b1afd7 upstream. + +Add an init flag idicating whether the FUSE_EXPIRE_ONLY flag of +FUSE_NOTIFY_INVAL_ENTRY is effective. + +This is needed for backports of this feature, otherwise the server could +just check the protocol version. + +Fixes: 4f8d37020e1f ("fuse: add "expire only" mode to FUSE_NOTIFY_INVAL_ENTRY") +Cc: # v6.2 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/inode.c | 3 ++- + include/uapi/linux/fuse.h | 3 +++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -1254,7 +1254,8 @@ void fuse_send_init(struct fuse_mount *f + FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | + FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | + FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | +- FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP; ++ FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | ++ FUSE_HAS_EXPIRE_ONLY; + #ifdef CONFIG_FUSE_DAX + if (fm->fc->dax) + flags |= FUSE_MAP_ALIGNMENT; +--- a/include/uapi/linux/fuse.h ++++ b/include/uapi/linux/fuse.h +@@ -206,6 +206,7 @@ + * - add extension header + * - add FUSE_EXT_GROUPS + * - add FUSE_CREATE_SUPP_GROUP ++ * - add FUSE_HAS_EXPIRE_ONLY + */ + + #ifndef _LINUX_FUSE_H +@@ -369,6 +370,7 @@ struct fuse_file_lock { + * FUSE_HAS_INODE_DAX: use per inode DAX + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) ++ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -406,6 +408,7 @@ struct fuse_file_lock { + #define FUSE_SECURITY_CTX (1ULL << 32) + #define FUSE_HAS_INODE_DAX (1ULL << 33) + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) ++#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) + + /** + * CUSE INIT request/reply flags diff --git a/queue-6.4/fuse-apply-flags2-only-when-userspace-set-the-fuse_init_ext.patch b/queue-6.4/fuse-apply-flags2-only-when-userspace-set-the-fuse_init_ext.patch new file mode 100644 index 00000000000..7ee5a8380e7 --- /dev/null +++ b/queue-6.4/fuse-apply-flags2-only-when-userspace-set-the-fuse_init_ext.patch @@ -0,0 +1,45 @@ +From 3066ff93476c35679cb07a97cce37d9bb07632ff Mon Sep 17 00:00:00 2001 +From: Bernd Schubert +Date: Fri, 15 Apr 2022 13:53:56 +0200 +Subject: fuse: Apply flags2 only when userspace set the FUSE_INIT_EXT + +From: Bernd Schubert + +commit 3066ff93476c35679cb07a97cce37d9bb07632ff upstream. + +This is just a safety precaution to avoid checking flags on memory that was +initialized on the user space side. libfuse zeroes struct fuse_init_out +outarg, but this is not guranteed to be done in all implementations. +Better is to act on flags and to only apply flags2 when FUSE_INIT_EXT is +set. + +There is a risk with this change, though - it might break existing user +space libraries, which are already using flags2 without setting +FUSE_INIT_EXT. + +The corresponding libfuse patch is here +https://github.com/libfuse/libfuse/pull/662 + +Signed-off-by: Bernd Schubert +Fixes: 53db28933e95 ("fuse: extend init flags") +Cc: # v5.17 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/inode.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -1134,7 +1134,10 @@ static void process_init_reply(struct fu + process_init_limits(fc, arg); + + if (arg->minor >= 6) { +- u64 flags = arg->flags | (u64) arg->flags2 << 32; ++ u64 flags = arg->flags; ++ ++ if (flags & FUSE_INIT_EXT) ++ flags |= (u64) arg->flags2 << 32; + + ra_pages = arg->max_readahead / PAGE_SIZE; + if (flags & FUSE_ASYNC_READ) diff --git a/queue-6.4/fuse-ioctl-translate-enosys-in-outarg.patch b/queue-6.4/fuse-ioctl-translate-enosys-in-outarg.patch new file mode 100644 index 00000000000..ffa3f307976 --- /dev/null +++ b/queue-6.4/fuse-ioctl-translate-enosys-in-outarg.patch @@ -0,0 +1,88 @@ +From 6a567e920fd0451bf29abc418df96c3365925770 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 7 Jun 2023 17:49:21 +0200 +Subject: fuse: ioctl: translate ENOSYS in outarg + +From: Miklos Szeredi + +commit 6a567e920fd0451bf29abc418df96c3365925770 upstream. + +Fuse shouldn't return ENOSYS from its ioctl implementation. If userspace +responds with ENOSYS it should be translated to ENOTTY. + +There are two ways to return an error from the IOCTL request: + + - fuse_out_header.error + - fuse_ioctl_out.result + +Commit 02c0cab8e734 ("fuse: ioctl: translate ENOSYS") already fixed this +issue for the first case, but missed the second case. This patch fixes the +second case. + +Reported-by: Jonathan Katz +Closes: https://lore.kernel.org/all/CALKgVmcC1VUV_gJVq70n--omMJZUb4HSh_FqvLTHgNBc+HCLFQ@mail.gmail.com/ +Fixes: 02c0cab8e734 ("fuse: ioctl: translate ENOSYS") +Cc: +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/ioctl.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +--- a/fs/fuse/ioctl.c ++++ b/fs/fuse/ioctl.c +@@ -9,14 +9,23 @@ + #include + #include + +-static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args) ++static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, ++ struct fuse_ioctl_out *outarg) + { +- ssize_t ret = fuse_simple_request(fm, args); ++ ssize_t ret; ++ ++ args->out_args[0].size = sizeof(*outarg); ++ args->out_args[0].value = outarg; ++ ++ ret = fuse_simple_request(fm, args); + + /* Translate ENOSYS, which shouldn't be returned from fs */ + if (ret == -ENOSYS) + ret = -ENOTTY; + ++ if (ret >= 0 && outarg->result == -ENOSYS) ++ outarg->result = -ENOTTY; ++ + return ret; + } + +@@ -264,13 +273,11 @@ long fuse_do_ioctl(struct file *file, un + } + + ap.args.out_numargs = 2; +- ap.args.out_args[0].size = sizeof(outarg); +- ap.args.out_args[0].value = &outarg; + ap.args.out_args[1].size = out_size; + ap.args.out_pages = true; + ap.args.out_argvar = true; + +- transferred = fuse_send_ioctl(fm, &ap.args); ++ transferred = fuse_send_ioctl(fm, &ap.args, &outarg); + err = transferred; + if (transferred < 0) + goto out; +@@ -399,12 +406,10 @@ static int fuse_priv_ioctl(struct inode + args.in_args[1].size = inarg.in_size; + args.in_args[1].value = ptr; + args.out_numargs = 2; +- args.out_args[0].size = sizeof(outarg); +- args.out_args[0].value = &outarg; + args.out_args[1].size = inarg.out_size; + args.out_args[1].value = ptr; + +- err = fuse_send_ioctl(fm, &args); ++ err = fuse_send_ioctl(fm, &args, &outarg); + if (!err) { + if (outarg.result < 0) + err = outarg.result; diff --git a/queue-6.4/fuse-revalidate-don-t-invalidate-if-interrupted.patch b/queue-6.4/fuse-revalidate-don-t-invalidate-if-interrupted.patch new file mode 100644 index 00000000000..46e5be8f3be --- /dev/null +++ b/queue-6.4/fuse-revalidate-don-t-invalidate-if-interrupted.patch @@ -0,0 +1,34 @@ +From a9d1c4c6df0e568207907c04aed9e7beb1294c42 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 7 Jun 2023 17:49:20 +0200 +Subject: fuse: revalidate: don't invalidate if interrupted + +From: Miklos Szeredi + +commit a9d1c4c6df0e568207907c04aed9e7beb1294c42 upstream. + +If the LOOKUP request triggered from fuse_dentry_revalidate() is +interrupted, then the dentry will be invalidated, possibly resulting in +submounts being unmounted. + +Reported-by: Xu Rongbo +Closes: https://lore.kernel.org/all/CAJfpegswN_CJJ6C3RZiaK6rpFmNyWmXfaEpnQUJ42KCwNF5tWw@mail.gmail.com/ +Fixes: 9e6268db496a ("[PATCH] FUSE - read-write operations") +Cc: +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -258,7 +258,7 @@ static int fuse_dentry_revalidate(struct + spin_unlock(&fi->lock); + } + kfree(forget); +- if (ret == -ENOMEM) ++ if (ret == -ENOMEM || ret == -EINTR) + goto out; + if (ret || fuse_invalid_attr(&outarg.attr) || + fuse_stale_inode(inode, outarg.generation, &outarg.attr)) diff --git a/queue-6.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-assoc_array.patch b/queue-6.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-assoc_array.patch new file mode 100644 index 00000000000..75ed3459f73 --- /dev/null +++ b/queue-6.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-assoc_array.patch @@ -0,0 +1,177 @@ +From d55901522f96082a43b9842d34867363c0cdbac5 Mon Sep 17 00:00:00 2001 +From: Petr Pavlu +Date: Thu, 23 Mar 2023 14:04:12 +0100 +Subject: keys: Fix linking a duplicate key to a keyring's assoc_array + +From: Petr Pavlu + +commit d55901522f96082a43b9842d34867363c0cdbac5 upstream. + +When making a DNS query inside the kernel using dns_query(), the request +code can in rare cases end up creating a duplicate index key in the +assoc_array of the destination keyring. It is eventually found by +a BUG_ON() check in the assoc_array implementation and results in +a crash. + +Example report: +[2158499.700025] kernel BUG at ../lib/assoc_array.c:652! +[2158499.700039] invalid opcode: 0000 [#1] SMP PTI +[2158499.700065] CPU: 3 PID: 31985 Comm: kworker/3:1 Kdump: loaded Not tainted 5.3.18-150300.59.90-default #1 SLE15-SP3 +[2158499.700096] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 +[2158499.700351] Workqueue: cifsiod cifs_resolve_server [cifs] +[2158499.700380] RIP: 0010:assoc_array_insert+0x85f/0xa40 +[2158499.700401] Code: ff 74 2b 48 8b 3b 49 8b 45 18 4c 89 e6 48 83 e7 fe e8 95 ec 74 00 3b 45 88 7d db 85 c0 79 d4 0f 0b 0f 0b 0f 0b e8 41 f2 be ff <0f> 0b 0f 0b 81 7d 88 ff ff ff 7f 4c 89 eb 4c 8b ad 58 ff ff ff 0f +[2158499.700448] RSP: 0018:ffffc0bd6187faf0 EFLAGS: 00010282 +[2158499.700470] RAX: ffff9f1ea7da2fe8 RBX: ffff9f1ea7da2fc1 RCX: 0000000000000005 +[2158499.700492] RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 +[2158499.700515] RBP: ffffc0bd6187fbb0 R08: ffff9f185faf1100 R09: 0000000000000000 +[2158499.700538] R10: ffff9f1ea7da2cc0 R11: 000000005ed8cec8 R12: ffffc0bd6187fc28 +[2158499.700561] R13: ffff9f15feb8d000 R14: ffff9f1ea7da2fc0 R15: ffff9f168dc0d740 +[2158499.700585] FS: 0000000000000000(0000) GS:ffff9f185fac0000(0000) knlGS:0000000000000000 +[2158499.700610] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[2158499.700630] CR2: 00007fdd94fca238 CR3: 0000000809d8c006 CR4: 00000000003706e0 +[2158499.700702] Call Trace: +[2158499.700741] ? key_alloc+0x447/0x4b0 +[2158499.700768] ? __key_link_begin+0x43/0xa0 +[2158499.700790] __key_link_begin+0x43/0xa0 +[2158499.700814] request_key_and_link+0x2c7/0x730 +[2158499.700847] ? dns_resolver_read+0x20/0x20 [dns_resolver] +[2158499.700873] ? key_default_cmp+0x20/0x20 +[2158499.700898] request_key_tag+0x43/0xa0 +[2158499.700926] dns_query+0x114/0x2ca [dns_resolver] +[2158499.701127] dns_resolve_server_name_to_ip+0x194/0x310 [cifs] +[2158499.701164] ? scnprintf+0x49/0x90 +[2158499.701190] ? __switch_to_asm+0x40/0x70 +[2158499.701211] ? __switch_to_asm+0x34/0x70 +[2158499.701405] reconn_set_ipaddr_from_hostname+0x81/0x2a0 [cifs] +[2158499.701603] cifs_resolve_server+0x4b/0xd0 [cifs] +[2158499.701632] process_one_work+0x1f8/0x3e0 +[2158499.701658] worker_thread+0x2d/0x3f0 +[2158499.701682] ? process_one_work+0x3e0/0x3e0 +[2158499.701703] kthread+0x10d/0x130 +[2158499.701723] ? kthread_park+0xb0/0xb0 +[2158499.701746] ret_from_fork+0x1f/0x40 + +The situation occurs as follows: +* Some kernel facility invokes dns_query() to resolve a hostname, for + example, "abcdef". The function registers its global DNS resolver + cache as current->cred.thread_keyring and passes the query to + request_key_net() -> request_key_tag() -> request_key_and_link(). +* Function request_key_and_link() creates a keyring_search_context + object. Its match_data.cmp method gets set via a call to + type->match_preparse() (resolves to dns_resolver_match_preparse()) to + dns_resolver_cmp(). +* Function request_key_and_link() continues and invokes + search_process_keyrings_rcu() which returns that a given key was not + found. The control is then passed to request_key_and_link() -> + construct_alloc_key(). +* Concurrently to that, a second task similarly makes a DNS query for + "abcdef." and its result gets inserted into the DNS resolver cache. +* Back on the first task, function construct_alloc_key() first runs + __key_link_begin() to determine an assoc_array_edit operation to + insert a new key. Index keys in the array are compared exactly as-is, + using keyring_compare_object(). The operation finds that "abcdef" is + not yet present in the destination keyring. +* Function construct_alloc_key() continues and checks if a given key is + already present on some keyring by again calling + search_process_keyrings_rcu(). This search is done using + dns_resolver_cmp() and "abcdef" gets matched with now present key + "abcdef.". +* The found key is linked on the destination keyring by calling + __key_link() and using the previously calculated assoc_array_edit + operation. This inserts the "abcdef." key in the array but creates + a duplicity because the same index key is already present. + +Fix the problem by postponing __key_link_begin() in +construct_alloc_key() until an actual key which should be linked into +the destination keyring is determined. + +[jarkko@kernel.org: added a fixes tag and cc to stable] +Cc: stable@vger.kernel.org # v5.3+ +Fixes: df593ee23e05 ("keys: Hoist locking out of __key_link_begin()") +Signed-off-by: Petr Pavlu +Reviewed-by: Joey Lee +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + security/keys/request_key.c | 35 ++++++++++++++++++++++++----------- + 1 file changed, 24 insertions(+), 11 deletions(-) + +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -401,17 +401,21 @@ static int construct_alloc_key(struct ke + set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + + if (dest_keyring) { +- ret = __key_link_lock(dest_keyring, &ctx->index_key); ++ ret = __key_link_lock(dest_keyring, &key->index_key); + if (ret < 0) + goto link_lock_failed; +- ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); +- if (ret < 0) +- goto link_prealloc_failed; + } + +- /* attach the key to the destination keyring under lock, but we do need ++ /* ++ * Attach the key to the destination keyring under lock, but we do need + * to do another check just in case someone beat us to it whilst we +- * waited for locks */ ++ * waited for locks. ++ * ++ * The caller might specify a comparison function which looks for keys ++ * that do not exactly match but are still equivalent from the caller's ++ * perspective. The __key_link_begin() operation must be done only after ++ * an actual key is determined. ++ */ + mutex_lock(&key_construction_mutex); + + rcu_read_lock(); +@@ -420,12 +424,16 @@ static int construct_alloc_key(struct ke + if (!IS_ERR(key_ref)) + goto key_already_present; + +- if (dest_keyring) ++ if (dest_keyring) { ++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit); ++ if (ret < 0) ++ goto link_alloc_failed; + __key_link(dest_keyring, key, &edit); ++ } + + mutex_unlock(&key_construction_mutex); + if (dest_keyring) +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++ __key_link_end(dest_keyring, &key->index_key, edit); + mutex_unlock(&user->cons_lock); + *_key = key; + kleave(" = 0 [%d]", key_serial(key)); +@@ -438,10 +446,13 @@ key_already_present: + mutex_unlock(&key_construction_mutex); + key = key_ref_to_ptr(key_ref); + if (dest_keyring) { ++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit); ++ if (ret < 0) ++ goto link_alloc_failed_unlocked; + ret = __key_link_check_live_key(dest_keyring, key); + if (ret == 0) + __key_link(dest_keyring, key, &edit); +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++ __key_link_end(dest_keyring, &key->index_key, edit); + if (ret < 0) + goto link_check_failed; + } +@@ -456,8 +467,10 @@ link_check_failed: + kleave(" = %d [linkcheck]", ret); + return ret; + +-link_prealloc_failed: +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++link_alloc_failed: ++ mutex_unlock(&key_construction_mutex); ++link_alloc_failed_unlocked: ++ __key_link_end(dest_keyring, &key->index_key, edit); + link_lock_failed: + mutex_unlock(&user->cons_lock); + key_put(key); diff --git a/queue-6.4/maple_tree-fix-node-allocation-testing-on-32-bit.patch b/queue-6.4/maple_tree-fix-node-allocation-testing-on-32-bit.patch new file mode 100644 index 00000000000..50edbd715e6 --- /dev/null +++ b/queue-6.4/maple_tree-fix-node-allocation-testing-on-32-bit.patch @@ -0,0 +1,40 @@ +From ef5c3de5211b5a3a8102b25aa83eb4cde65ac2fd Mon Sep 17 00:00:00 2001 +From: "Liam R. Howlett" +Date: Wed, 12 Jul 2023 13:39:16 -0400 +Subject: maple_tree: fix node allocation testing on 32 bit + +From: Liam R. Howlett + +commit ef5c3de5211b5a3a8102b25aa83eb4cde65ac2fd upstream. + +Internal node counting was altered and the 64 bit test was updated, +however the 32bit test was missed. + +Restore the 32bit test to a functional state. + +Link: https://lore.kernel.org/linux-mm/CAMuHMdV4T53fOw7VPoBgPR7fP6RYqf=CBhD_y_vOg53zZX_DnA@mail.gmail.com/ +Link: https://lkml.kernel.org/r/20230712173916.168805-2-Liam.Howlett@oracle.com +Fixes: 541e06b772c1 ("maple_tree: remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()") +Signed-off-by: Liam R. Howlett +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/radix-tree/maple.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/testing/radix-tree/maple.c ++++ b/tools/testing/radix-tree/maple.c +@@ -206,9 +206,9 @@ static noinline void check_new_node(stru + e = i - 1; + } else { + if (i >= 4) +- e = i - 4; +- else if (i == 3) +- e = i - 2; ++ e = i - 3; ++ else if (i >= 1) ++ e = i - 1; + else + e = 0; + } diff --git a/queue-6.4/maple_tree-set-the-node-limit-when-creating-a-new-root-node.patch b/queue-6.4/maple_tree-set-the-node-limit-when-creating-a-new-root-node.patch new file mode 100644 index 00000000000..a246c4bc1cb --- /dev/null +++ b/queue-6.4/maple_tree-set-the-node-limit-when-creating-a-new-root-node.patch @@ -0,0 +1,44 @@ +From 3c769fd88b9742954763a968e84de09f7ad78cfe Mon Sep 17 00:00:00 2001 +From: Peng Zhang +Date: Tue, 11 Jul 2023 11:54:37 +0800 +Subject: maple_tree: set the node limit when creating a new root node + +From: Peng Zhang + +commit 3c769fd88b9742954763a968e84de09f7ad78cfe upstream. + +Set the node limit of the root node so that the last pivot of all nodes is +the node limit (if the node is not full). + +This patch also fixes a bug in mas_rev_awalk(). Effectively, always +setting a maximum makes mas_logical_pivot() behave as mas_safe_pivot(). +Without this fix, it is possible that very small tasks would fail to find +the correct gap. Although this has not been observed with real tasks, it +has been reported to happen in m68k nommu running the maple tree tests. + +Link: https://lkml.kernel.org/r/20230711035444.526-1-zhangpeng.00@bytedance.com +Link: https://lore.kernel.org/linux-mm/CAMuHMdV4T53fOw7VPoBgPR7fP6RYqf=CBhD_y_vOg53zZX_DnA@mail.gmail.com/ +Link: https://lkml.kernel.org/r/20230711035444.526-2-zhangpeng.00@bytedance.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Peng Zhang +Reviewed-by: Liam R. Howlett +Tested-by: Geert Uytterhoeven +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -3693,7 +3693,8 @@ static inline int mas_root_expand(struct + mas->offset = slot; + pivots[slot] = mas->last; + if (mas->last != ULONG_MAX) +- slot++; ++ pivots[++slot] = ULONG_MAX; ++ + mas->depth = 1; + mas_set_height(mas); + ma_set_meta(node, maple_leaf_64, 0, slot); diff --git a/queue-6.4/mm-mlock-fix-vma-iterator-conversion-of-apply_vma_lock_flags.patch b/queue-6.4/mm-mlock-fix-vma-iterator-conversion-of-apply_vma_lock_flags.patch new file mode 100644 index 00000000000..cdab42a7c6d --- /dev/null +++ b/queue-6.4/mm-mlock-fix-vma-iterator-conversion-of-apply_vma_lock_flags.patch @@ -0,0 +1,70 @@ +From 2658f94d679243209889cdfa8de3743cde1abea9 Mon Sep 17 00:00:00 2001 +From: "Liam R. Howlett" +Date: Tue, 11 Jul 2023 13:50:20 -0400 +Subject: mm/mlock: fix vma iterator conversion of apply_vma_lock_flags() + +From: Liam R. Howlett + +commit 2658f94d679243209889cdfa8de3743cde1abea9 upstream. + +apply_vma_lock_flags() calls mlock_fixup(), which could merge the VMA +after where the vma iterator is located. Although this is not an issue, +the next iteration of the loop will check the start of the vma to be equal +to the locally saved 'tmp' variable and cause an incorrect failure +scenario. Fix the error by setting tmp to the end of the vma iterator +value before restarting the loop. + +There is also a potential of the error code being overwritten when the +loop terminates early. Fix the return issue by directly returning when an +error is encountered since there is nothing to undo after the loop. + +Link: https://lkml.kernel.org/r/20230711175020.4091336-1-Liam.Howlett@oracle.com +Fixes: 37598f5a9d8b ("mlock: convert mlock to vma iterator") +Signed-off-by: Liam R. Howlett +Reported-by: Ryan Roberts + Link: https://lore.kernel.org/linux-mm/50341ca1-d582-b33a-e3d0-acb08a65166f@arm.com/ +Tested-by: Ryan Roberts +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mlock.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -471,7 +471,6 @@ static int apply_vma_lock_flags(unsigned + { + unsigned long nstart, end, tmp; + struct vm_area_struct *vma, *prev; +- int error; + VMA_ITERATOR(vmi, current->mm, start); + + VM_BUG_ON(offset_in_page(start)); +@@ -492,6 +491,7 @@ static int apply_vma_lock_flags(unsigned + nstart = start; + tmp = vma->vm_start; + for_each_vma_range(vmi, vma, end) { ++ int error; + vm_flags_t newflags; + + if (vma->vm_start != tmp) +@@ -505,14 +505,15 @@ static int apply_vma_lock_flags(unsigned + tmp = end; + error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags); + if (error) +- break; ++ return error; ++ tmp = vma_iter_end(&vmi); + nstart = tmp; + } + +- if (vma_iter_end(&vmi) < end) ++ if (tmp < end) + return -ENOMEM; + +- return error; ++ return 0; + } + + /* diff --git a/queue-6.4/perf-probe-add-test-for-regression-introduced-by-switch-to-die_get_decl_file.patch b/queue-6.4/perf-probe-add-test-for-regression-introduced-by-switch-to-die_get_decl_file.patch new file mode 100644 index 00000000000..ac282bd2634 --- /dev/null +++ b/queue-6.4/perf-probe-add-test-for-regression-introduced-by-switch-to-die_get_decl_file.patch @@ -0,0 +1,115 @@ +From 56cbeacf143530576905623ac72ae0964f3293a6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Georg=20M=C3=BCller?= +Date: Wed, 28 Jun 2023 10:45:50 +0200 +Subject: perf probe: Add test for regression introduced by switch to die_get_decl_file() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Georg Müller + +commit 56cbeacf143530576905623ac72ae0964f3293a6 upstream. + +This patch adds a test to validate that 'perf probe' works for binaries +where DWARF info is split into multiple CUs + +Signed-off-by: Georg Müller +Acked-by: Masami Hiramatsu (Google) +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: regressions@lists.linux.dev +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230628084551.1860532-5-georgmueller@gmx.net +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 77 ++++++++++++++++ + 1 file changed, 77 insertions(+) + create mode 100755 tools/perf/tests/shell/test_uprobe_from_different_cu.sh + +--- /dev/null ++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +@@ -0,0 +1,77 @@ ++#!/bin/bash ++# test perf probe of function from different CU ++# SPDX-License-Identifier: GPL-2.0 ++ ++set -e ++ ++temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) ++ ++cleanup() ++{ ++ trap - EXIT TERM INT ++ if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then ++ echo "--- Cleaning up ---" ++ perf probe -x ${temp_dir}/testfile -d foo ++ rm -f "${temp_dir}/"* ++ rmdir "${temp_dir}" ++ fi ++} ++ ++trap_cleanup() ++{ ++ cleanup ++ exit 1 ++} ++ ++trap trap_cleanup EXIT TERM INT ++ ++cat > ${temp_dir}/testfile-foo.h << EOF ++struct t ++{ ++ int *p; ++ int c; ++}; ++ ++extern int foo (int i, struct t *t); ++EOF ++ ++cat > ${temp_dir}/testfile-foo.c << EOF ++#include "testfile-foo.h" ++ ++int ++foo (int i, struct t *t) ++{ ++ int j, res = 0; ++ for (j = 0; j < i && j < t->c; j++) ++ res += t->p[j]; ++ ++ return res; ++} ++EOF ++ ++cat > ${temp_dir}/testfile-main.c << EOF ++#include "testfile-foo.h" ++ ++static struct t g; ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int j[argc]; ++ g.c = argc; ++ g.p = j; ++ for (i = 0; i < argc; i++) ++ j[i] = (int) argv[i][0]; ++ return foo (3, &g); ++} ++EOF ++ ++gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o ++gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o ++gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o ++ ++perf probe -x ${temp_dir}/testfile --funcs foo ++perf probe -x ${temp_dir}/testfile foo ++ ++cleanup diff --git a/queue-6.4/perf-probe-read-dwarf-files-from-the-correct-cu.patch b/queue-6.4/perf-probe-read-dwarf-files-from-the-correct-cu.patch new file mode 100644 index 00000000000..8d4924e4eea --- /dev/null +++ b/queue-6.4/perf-probe-read-dwarf-files-from-the-correct-cu.patch @@ -0,0 +1,66 @@ +From c66e1c68c13b872505f25ab641c44b77313ee7fe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Georg=20M=C3=BCller?= +Date: Wed, 28 Jun 2023 10:45:51 +0200 +Subject: perf probe: Read DWARF files from the correct CU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Georg Müller + +commit c66e1c68c13b872505f25ab641c44b77313ee7fe upstream. + +After switching from dwarf_decl_file() to die_get_decl_file(), it is not +possible to add probes for certain functions: + + $ perf probe -x /usr/lib/systemd/systemd-logind match_unit_removed + A function DIE doesn't have decl_line. Maybe broken DWARF? + A function DIE doesn't have decl_line. Maybe broken DWARF? + Probe point 'match_unit_removed' not found. + Error: Failed to add events. + +The problem is that die_get_decl_file() uses the wrong CU to search for +the file. elfutils commit e1db5cdc9f has some good explanation for this: + + dwarf_decl_file uses dwarf_attr_integrate to get the DW_AT_decl_file + attribute. This means the attribute might come from a different DIE + in a different CU. If so, we need to use the CU associated with the + attribute, not the original DIE, to resolve the file name. + +This patch uses the same source of information as elfutils: use attribute +DW_AT_decl_file and use this CU to search for the file. + +Fixes: dc9a5d2ccd5c823c ("perf probe: Fix to get declared file name from clang DWARF5") +Signed-off-by: Georg Müller +Acked-by: Masami Hiramatsu (Google) +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: regressions@lists.linux.dev +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230628084551.1860532-6-georgmueller@gmx.net +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/dwarf-aux.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -478,8 +478,10 @@ static const char *die_get_file_name(Dwa + { + Dwarf_Die cu_die; + Dwarf_Files *files; ++ Dwarf_Attribute attr_mem; + +- if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) || ++ if (idx < 0 || !dwarf_attr_integrate(dw_die, DW_AT_decl_file, &attr_mem) || ++ !dwarf_cu_die(attr_mem.cu, &cu_die, NULL, NULL, NULL, NULL, NULL, NULL) || + dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) + return NULL; + diff --git a/queue-6.4/prctl-move-pr_get_auxv-out-of-pr_mce_kill.patch b/queue-6.4/prctl-move-pr_get_auxv-out-of-pr_mce_kill.patch new file mode 100644 index 00000000000..1c29ce63ef1 --- /dev/null +++ b/queue-6.4/prctl-move-pr_get_auxv-out-of-pr_mce_kill.patch @@ -0,0 +1,67 @@ +From 636e348353a7cc52609fdba5ff3270065da140d5 Mon Sep 17 00:00:00 2001 +From: Miguel Ojeda +Date: Sun, 9 Jul 2023 01:33:44 +0200 +Subject: prctl: move PR_GET_AUXV out of PR_MCE_KILL + +From: Miguel Ojeda + +commit 636e348353a7cc52609fdba5ff3270065da140d5 upstream. + +Somehow PR_GET_AUXV got added into PR_MCE_KILL's switch when the patch was +applied [1]. + +Thus move it out of the switch, to the place the patch added it. + +In the recently released v6.4 kernel some user could, in principle, be +already using this feature by mapping the right page and passing the +PR_GET_AUXV constant as a pointer: + + prctl(PR_MCE_KILL, PR_GET_AUXV, ...) + +So this does change the behavior for users. We could keep the bug since +the other subcases in PR_MCE_KILL (PR_MCE_KILL_CLEAR and PR_MCE_KILL_SET) +do not overlap. + +However, v6.4 may be recent enough (2 weeks old) that moving the lines +(rather than just adding a new case) does not break anybody? Moreover, +the documentation in man-pages was just committed today [2]. + +Link: https://lkml.kernel.org/r/20230708233344.361854-1-ojeda@kernel.org +Fixes: ddc65971bb67 ("prctl: add PR_GET_AUXV to copy auxv to userspace") +Link: https://lore.kernel.org/all/d81864a7f7f43bca6afa2a09fc2e850e4050ab42.1680611394.git.josh@joshtriplett.org/ [1] +Link: https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/commit/?id=8cf0c06bfd3c2b219b044d4151c96f0da50af9ad [2] +Signed-off-by: Miguel Ojeda +Cc: Josh Triplett +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sys.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -2529,11 +2529,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsi + else + return -EINVAL; + break; +- case PR_GET_AUXV: +- if (arg4 || arg5) +- return -EINVAL; +- error = prctl_get_auxv((void __user *)arg2, arg3); +- break; + default: + return -EINVAL; + } +@@ -2688,6 +2683,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsi + case PR_SET_VMA: + error = prctl_set_vma(arg2, arg3, arg4, arg5); + break; ++ case PR_GET_AUXV: ++ if (arg4 || arg5) ++ return -EINVAL; ++ error = prctl_get_auxv((void __user *)arg2, arg3); ++ break; + #ifdef CONFIG_KSM + case PR_SET_MEMORY_MERGE: + if (arg3 || arg4 || arg5) diff --git a/queue-6.4/selftests-mm-mkdirty-fix-incorrect-position-of-endif.patch b/queue-6.4/selftests-mm-mkdirty-fix-incorrect-position-of-endif.patch new file mode 100644 index 00000000000..bdaf788d1de --- /dev/null +++ b/queue-6.4/selftests-mm-mkdirty-fix-incorrect-position-of-endif.patch @@ -0,0 +1,37 @@ +From 25b5949c30938c7f26dbadc948b491e0e0811c78 Mon Sep 17 00:00:00 2001 +From: Colin Ian King +Date: Wed, 12 Jul 2023 14:46:48 +0100 +Subject: selftests/mm: mkdirty: fix incorrect position of #endif + +From: Colin Ian King + +commit 25b5949c30938c7f26dbadc948b491e0e0811c78 upstream. + +The #endif is the wrong side of a } causing a build failure when +__NR_userfaultfd is not defined. Fix this by moving the #end to enclose +the } + +Link: https://lkml.kernel.org/r/20230712134648.456349-1-colin.i.king@gmail.com +Fixes: 9eac40fc0cc7 ("selftests/mm: mkdirty: test behavior of (pte|pmd)_mkdirty on VMAs without write permissions") +Signed-off-by: Colin Ian King +Reviewed-by: David Hildenbrand +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/mkdirty.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/mm/mkdirty.c ++++ b/tools/testing/selftests/mm/mkdirty.c +@@ -321,8 +321,8 @@ close_uffd: + munmap: + munmap(dst, pagesize); + free(src); +-#endif /* __NR_userfaultfd */ + } ++#endif /* __NR_userfaultfd */ + + int main(void) + { diff --git a/queue-6.4/selftests-tc-add-ct-action-kconfig-dep.patch b/queue-6.4/selftests-tc-add-ct-action-kconfig-dep.patch new file mode 100644 index 00000000000..07859eec8d1 --- /dev/null +++ b/queue-6.4/selftests-tc-add-ct-action-kconfig-dep.patch @@ -0,0 +1,43 @@ +From 719b4774a8cb1a501e2d22a5a4a3a0a870e427d5 Mon Sep 17 00:00:00 2001 +From: Matthieu Baerts +Date: Thu, 13 Jul 2023 23:16:45 +0200 +Subject: selftests: tc: add 'ct' action kconfig dep + +From: Matthieu Baerts + +commit 719b4774a8cb1a501e2d22a5a4a3a0a870e427d5 upstream. + +When looking for something else in LKFT reports [1], I noticed most of +the tests were skipped because the "teardown stage" did not complete +successfully. + +Pedro found out this is due to the fact CONFIG_NF_FLOW_TABLE is required +but not listed in the 'config' file. Adding it to the list fixes the +issues on LKFT side. CONFIG_NET_ACT_CT is now set to 'm' in the final +kconfig. + +Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") +Cc: stable@vger.kernel.org +Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20230711/testrun/18267241/suite/kselftest-tc-testing/test/tc-testing_tdc_sh/log [1] +Link: https://lore.kernel.org/netdev/0e061d4a-9a23-9f58-3b35-d8919de332d7@tessares.net/T/ [2] +Suggested-by: Pedro Tammela +Signed-off-by: Matthieu Baerts +Tested-by: Zhengchao Shao +Link: https://lore.kernel.org/r/20230713-tc-selftests-lkft-v1-2-1eb4fd3a96e7@tessares.net +Acked-by: Jamal Hadi Salim +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/tc-testing/config | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/tc-testing/config ++++ b/tools/testing/selftests/tc-testing/config +@@ -5,6 +5,7 @@ CONFIG_NF_CONNTRACK=m + CONFIG_NF_CONNTRACK_MARK=y + CONFIG_NF_CONNTRACK_ZONES=y + CONFIG_NF_CONNTRACK_LABELS=y ++CONFIG_NF_FLOW_TABLE=m + CONFIG_NF_NAT=m + CONFIG_NETFILTER_XT_TARGET_LOG=m + diff --git a/queue-6.4/selftests-tc-set-timeout-to-15-minutes.patch b/queue-6.4/selftests-tc-set-timeout-to-15-minutes.patch new file mode 100644 index 00000000000..ea00bbfff7d --- /dev/null +++ b/queue-6.4/selftests-tc-set-timeout-to-15-minutes.patch @@ -0,0 +1,43 @@ +From fda05798c22a354efde09a76bdfc276b2d591829 Mon Sep 17 00:00:00 2001 +From: Matthieu Baerts +Date: Thu, 13 Jul 2023 23:16:44 +0200 +Subject: selftests: tc: set timeout to 15 minutes + +From: Matthieu Baerts + +commit fda05798c22a354efde09a76bdfc276b2d591829 upstream. + +When looking for something else in LKFT reports [1], I noticed that the +TC selftest ended with a timeout error: + + not ok 1 selftests: tc-testing: tdc.sh # TIMEOUT 45 seconds + +The timeout had been introduced 3 years ago, see the Fixes commit below. + +This timeout is only in place when executing the selftests via the +kselftests runner scripts. I guess this is not what most TC devs are +using and nobody noticed the issue before. + +The new timeout is set to 15 minutes as suggested by Pedro [2]. It looks +like it is plenty more time than what it takes in "normal" conditions. + +Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") +Cc: stable@vger.kernel.org +Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20230711/testrun/18267241/suite/kselftest-tc-testing/test/tc-testing_tdc_sh/log [1] +Link: https://lore.kernel.org/netdev/0e061d4a-9a23-9f58-3b35-d8919de332d7@tessares.net/T/ [2] +Suggested-by: Pedro Tammela +Signed-off-by: Matthieu Baerts +Reviewed-by: Zhengchao Shao +Link: https://lore.kernel.org/r/20230713-tc-selftests-lkft-v1-1-1eb4fd3a96e7@tessares.net +Acked-by: Jamal Hadi Salim +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/tc-testing/settings | 1 + + 1 file changed, 1 insertion(+) + create mode 100644 tools/testing/selftests/tc-testing/settings + +--- /dev/null ++++ b/tools/testing/selftests/tc-testing/settings +@@ -0,0 +1 @@ ++timeout=900 diff --git a/queue-6.4/series b/queue-6.4/series index 6eb754b814c..fac95b45371 100644 --- a/queue-6.4/series +++ b/queue-6.4/series @@ -3,3 +3,25 @@ io_uring-fix-io_uring-mmap-by-using-architecture-provided-get_unmapped_area.patc alsa-hda-realtek-remove-3k-pull-low-procedure.patch alsa-hda-realtek-add-quirk-for-clevo-ns70au.patch alsa-hda-realtek-enable-mute-led-on-hp-laptop-15s-eq2xxx.patch +maple_tree-set-the-node-limit-when-creating-a-new-root-node.patch +mm-mlock-fix-vma-iterator-conversion-of-apply_vma_lock_flags.patch +maple_tree-fix-node-allocation-testing-on-32-bit.patch +selftests-mm-mkdirty-fix-incorrect-position-of-endif.patch +keys-fix-linking-a-duplicate-key-to-a-keyring-s-assoc_array.patch +prctl-move-pr_get_auxv-out-of-pr_mce_kill.patch +perf-probe-add-test-for-regression-introduced-by-switch-to-die_get_decl_file.patch +perf-probe-read-dwarf-files-from-the-correct-cu.patch +btrfs-fix-iput-on-error-pointer-after-error-during-orphan-cleanup.patch +btrfs-fix-warning-when-putting-transaction-with-qgroups-enabled-after-abort.patch +fuse-revalidate-don-t-invalidate-if-interrupted.patch +fuse-add-feature-flag-for-expire-only.patch +fuse-apply-flags2-only-when-userspace-set-the-fuse_init_ext.patch +btrfs-raid56-always-verify-the-p-q-contents-for-scrub.patch +btrfs-set_page_extent_mapped-after-read_folio-in-btrfs_cont_expand.patch +btrfs-fix-double-iput-on-inode-after-an-error-during-orphan-cleanup.patch +btrfs-zoned-fix-memory-leak-after-finding-block-group-with-super-blocks.patch +fuse-ioctl-translate-enosys-in-outarg.patch +btrfs-fix-race-between-balance-and-cancel-pause.patch +selftests-tc-set-timeout-to-15-minutes.patch +accel-qaic-fix-a-leak-in-map_user_pages.patch +selftests-tc-add-ct-action-kconfig-dep.patch