From 32faa5edc2374fe65bac132737d57b57712168fc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Jun 2020 13:26:37 +0200 Subject: [PATCH] 4.19-stable patches added patches: arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch mm-slab-use-memzero_explicit-in-kzfree.patch ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch ocfs2-fix-value-of-ocfs2_invalid_slot.patch ocfs2-load-global_inode_alloc.patch --- ...ort-the-pc-value-in-regs_abi_32-mode.patch | 67 ++++++ ...tion-failure-due-to-concurrent-scrub.patch | 194 ++++++++++++++++++ ...rite-into-prealloc-extent-beyond-eof.patch | 61 ++++++ ...-slab-use-memzero_explicit-in-kzfree.patch | 54 +++++ ...e-removal-while-nfsd-is-accessing-it.patch | 98 +++++++++ ...2-fix-panic-on-nfs-server-over-ocfs2.patch | 90 ++++++++ ...cfs2-fix-value-of-ocfs2_invalid_slot.patch | 53 +++++ .../ocfs2-load-global_inode_alloc.patch | 43 ++++ queue-4.19/series | 8 + 9 files changed, 668 insertions(+) create mode 100644 queue-4.19/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch create mode 100644 queue-4.19/btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch create mode 100644 queue-4.19/btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch create mode 100644 queue-4.19/mm-slab-use-memzero_explicit-in-kzfree.patch create mode 100644 queue-4.19/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch create mode 100644 queue-4.19/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch create mode 100644 queue-4.19/ocfs2-fix-value-of-ocfs2_invalid_slot.patch create mode 100644 queue-4.19/ocfs2-load-global_inode_alloc.patch diff --git a/queue-4.19/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch b/queue-4.19/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch new file mode 100644 index 00000000000..4faf00e0312 --- /dev/null +++ b/queue-4.19/arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch @@ -0,0 +1,67 @@ +From 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 Mon Sep 17 00:00:00 2001 +From: Jiping Ma +Date: Mon, 11 May 2020 10:52:07 +0800 +Subject: arm64: perf: Report the PC value in REGS_ABI_32 mode + +From: Jiping Ma + +commit 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 upstream. + +A 32-bit perf querying the registers of a compat task using REGS_ABI_32 +will receive zeroes from w15, when it expects to find the PC. + +Return the PC value for register dwarf register 15 when returning register +values for a compat task to perf. + +Cc: +Acked-by: Mark Rutland +Signed-off-by: Jiping Ma +Link: https://lore.kernel.org/r/1589165527-188401-1-git-send-email-jiping.ma2@windriver.com +[will: Shuffled code and added a comment] +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/perf_regs.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kernel/perf_regs.c ++++ b/arch/arm64/kernel/perf_regs.c +@@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, + return 0; + + /* +- * Compat (i.e. 32 bit) mode: +- * - PC has been set in the pt_regs struct in kernel_entry, +- * - Handle SP and LR here. ++ * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but ++ * we're stuck with it for ABI compatability reasons. ++ * ++ * For a 32-bit consumer inspecting a 32-bit task, then it will look at ++ * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). ++ * These correspond directly to a prefix of the registers saved in our ++ * 'struct pt_regs', with the exception of the PC, so we copy that down ++ * (x15 corresponds to SP_hyp in the architecture). ++ * ++ * So far, so good. ++ * ++ * The oddity arises when a 64-bit consumer looks at a 32-bit task and ++ * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return ++ * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and ++ * PC registers would normally live. The initial idea was to allow a ++ * 64-bit unwinder to unwind a 32-bit task and, although it's not clear ++ * how well that works in practice, somebody might be relying on it. ++ * ++ * At the time we make a sample, we don't know whether the consumer is ++ * 32-bit or 64-bit, so we have to cater for both possibilities. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; ++ if (idx == 15) ++ return regs->pc; + } + + if ((u32)idx == PERF_REG_ARM64_SP) diff --git a/queue-4.19/btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch b/queue-4.19/btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch new file mode 100644 index 00000000000..2f1231d40d8 --- /dev/null +++ b/queue-4.19/btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch @@ -0,0 +1,194 @@ +From 432cd2a10f1c10cead91fe706ff5dc52f06d642a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 8 Jun 2020 13:32:55 +0100 +Subject: btrfs: fix data block group relocation failure due to concurrent scrub + +From: Filipe Manana + +commit 432cd2a10f1c10cead91fe706ff5dc52f06d642a upstream. + +When running relocation of a data block group while scrub is running in +parallel, it is possible that the relocation will fail and abort the +current transaction with an -EINVAL error: + + [134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents + [134243.999871] ------------[ cut here ]------------ + [134244.000741] BTRFS: Transaction aborted (error -22) + [134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs] + [134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...) + [134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 + [134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 + [134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs] + [134244.017151] Code: 48 c7 c7 (...) + [134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286 + [134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000 + [134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001 + [134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001 + [134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08 + [134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000 + [134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000 + [134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0 + [134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [134244.034484] Call Trace: + [134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs] + [134244.035859] do_relocation+0x30b/0x790 [btrfs] + [134244.036681] ? do_raw_spin_unlock+0x49/0xc0 + [134244.037460] ? _raw_spin_unlock+0x29/0x40 + [134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs] + [134244.039245] relocate_block_group+0x388/0x770 [btrfs] + [134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs] + [134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs] + [134244.041345] btrfs_balance+0xc06/0x1860 [btrfs] + [134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs] + [134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs] + [134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs] + [134244.049043] ? do_raw_spin_unlock+0x49/0xc0 + [134244.049838] ? _raw_spin_unlock+0x29/0x40 + [134244.050587] ? __handle_mm_fault+0x11b3/0x14b0 + [134244.051417] ? ksys_ioctl+0x92/0xb0 + [134244.052070] ksys_ioctl+0x92/0xb0 + [134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c + [134244.053511] __x64_sys_ioctl+0x16/0x20 + [134244.054206] do_syscall_64+0x5c/0x280 + [134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [134244.055819] RIP: 0033:0x7f29b51c9dd7 + [134244.056491] Code: 00 00 00 (...) + [134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 + [134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7 + [134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003 + [134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000 + [134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a + [134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0 + [134244.067626] irq event stamp: 0 + [134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0 + [134244.069351] hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 + [134244.070909] softirqs last enabled at (0): [] copy_process+0x74f/0x2020 + [134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0 + [134244.073432] ---[ end trace bd7c03622e0b0a99 ]--- + +The -EINVAL error comes from the following chain of function calls: + + __btrfs_cow_block() <-- aborts the transaction + btrfs_reloc_cow_block() + replace_file_extents() + get_new_location() <-- returns -EINVAL + +When relocating a data block group, for each allocated extent of the block +group, we preallocate another extent (at prealloc_file_extent_cluster()), +associated with the data relocation inode, and then dirty all its pages. +These preallocated extents have, and must have, the same size that extents +from the data block group being relocated have. + +Later before we start the relocation stage that updates pointers (bytenr +field of file extent items) to point to the the new extents, we trigger +writeback for the data relocation inode. The expectation is that writeback +will write the pages to the previously preallocated extents, that it +follows the NOCOW path. That is generally the case, however, if a scrub +is running it may have turned the block group that contains those extents +into RO mode, in which case writeback falls back to the COW path. + +However in the COW path instead of allocating exactly one extent with the +expected size, the allocator may end up allocating several smaller extents +due to free space fragmentation - because we tell it at cow_file_range() +that the minimum allocation size can match the filesystem's sector size. +This later breaks the relocation's expectation that an extent associated +to a file extent item in the data relocation inode has the same size as +the respective extent pointed by a file extent item in another tree - in +this case the extent to which the relocation inode poins to is smaller, +causing relocation.c:get_new_location() to return -EINVAL. + +For example, if we are relocating a data block group X that has a logical +address of X and the block group has an extent allocated at the logical +address X + 128KiB with a size of 64KiB: + +1) At prealloc_file_extent_cluster() we allocate an extent for the data + relocation inode with a size of 64KiB and associate it to the file + offset 128KiB (X + 128KiB - X) of the data relocation inode. This + preallocated extent was allocated at block group Z; + +2) A scrub running in parallel turns block group Z into RO mode and + starts scrubing its extents; + +3) Relocation triggers writeback for the data relocation inode; + +4) When running delalloc (btrfs_run_delalloc_range()), we try first the + NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC + set in its flags. However, because block group Z is in RO mode, the + NOCOW path (run_delalloc_nocow()) falls back into the COW path, by + calling cow_file_range(); + +5) At cow_file_range(), in the first iteration of the while loop we call + btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum + allocation size of 4KiB (fs_info->sectorsize). Due to free space + fragmentation, btrfs_reserve_extent() ends up allocating two extents + of 32KiB each, each one on a different iteration of that while loop; + +6) Writeback of the data relocation inode completes; + +7) Relocation proceeds and ends up at relocation.c:replace_file_extents(), + with a leaf which has a file extent item that points to the data extent + from block group X, that has a logical address (bytenr) of X + 128KiB + and a size of 64KiB. Then it calls get_new_location(), which does a + lookup in the data relocation tree for a file extent item starting at + offset 128KiB (X + 128KiB - X) and belonging to the data relocation + inode. It finds a corresponding file extent item, however that item + points to an extent that has a size of 32KiB, which doesn't match the + expected size of 64KiB, resuling in -EINVAL being returned from this + function and propagated up to __btrfs_cow_block(), which aborts the + current transaction. + +To fix this make sure that at cow_file_range() when we call the allocator +we pass it a minimum allocation size corresponding the desired extent size +if the inode belongs to the data relocation tree, otherwise pass it the +filesystem's sector size as the minimum allocation size. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -977,6 +977,7 @@ static noinline int cow_file_range(struc + u64 num_bytes; + unsigned long ram_size; + u64 cur_alloc_size = 0; ++ u64 min_alloc_size; + u64 blocksize = fs_info->sectorsize; + struct btrfs_key ins; + struct extent_map *em; +@@ -1028,10 +1029,26 @@ static noinline int cow_file_range(struc + btrfs_drop_extent_cache(BTRFS_I(inode), start, + start + num_bytes - 1, 0); + ++ /* ++ * Relocation relies on the relocated extents to have exactly the same ++ * size as the original extents. Normally writeback for relocation data ++ * extents follows a NOCOW path because relocation preallocates the ++ * extents. However, due to an operation such as scrub turning a block ++ * group to RO mode, it may fallback to COW mode, so we must make sure ++ * an extent allocated during COW has exactly the requested size and can ++ * not be split into smaller extents, otherwise relocation breaks and ++ * fails during the stage where it updates the bytenr of file extent ++ * items. ++ */ ++ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) ++ min_alloc_size = num_bytes; ++ else ++ min_alloc_size = fs_info->sectorsize; ++ + while (num_bytes > 0) { + cur_alloc_size = num_bytes; + ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, +- fs_info->sectorsize, 0, alloc_hint, ++ min_alloc_size, 0, alloc_hint, + &ins, 1, 1); + if (ret < 0) + goto out_unlock; diff --git a/queue-4.19/btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch b/queue-4.19/btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch new file mode 100644 index 00000000000..a4b57872d69 --- /dev/null +++ b/queue-4.19/btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch @@ -0,0 +1,61 @@ +From 4b1946284dd6641afdb9457101056d9e6ee6204c Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 15 Jun 2020 18:48:58 +0100 +Subject: btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof + +From: Filipe Manana + +commit 4b1946284dd6641afdb9457101056d9e6ee6204c upstream. + +If we attempt to write to prealloc extent located after eof using a +RWF_NOWAIT write, we always fail with -EAGAIN. + +We do actually check if we have an allocated extent for the write at +the start of btrfs_file_write_iter() through a call to check_can_nocow(), +but later when we go into the actual direct IO write path we simply +return -EAGAIN if the write starts at or beyond EOF. + +Trivial to reproduce: + + $ mkfs.btrfs -f /dev/sdb + $ mount /dev/sdb /mnt + + $ touch /mnt/foo + $ chattr +C /mnt/foo + + $ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foo + wrote 65536/65536 bytes at offset 0 + 64 KiB, 16 ops; 0.0004 sec (135.575 MiB/sec and 34707.1584 ops/sec) + + $ xfs_io -c "falloc -k 64K 1M" /mnt/foo + + $ xfs_io -d -c "pwrite -N -V 1 -S 0xfe -b 64K 64K 64K" /mnt/foo + pwrite: Resource temporarily unavailable + +On xfs and ext4 the write succeeds, as expected. + +Fix this by removing the wrong check at btrfs_direct_IO(). + +Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") +CC: stable@vger.kernel.org # 4.14+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8656,9 +8656,6 @@ static ssize_t btrfs_direct_IO(struct ki + dio_data.overwrite = 1; + inode_unlock(inode); + relock = true; +- } else if (iocb->ki_flags & IOCB_NOWAIT) { +- ret = -EAGAIN; +- goto out; + } + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, + offset, count); diff --git a/queue-4.19/mm-slab-use-memzero_explicit-in-kzfree.patch b/queue-4.19/mm-slab-use-memzero_explicit-in-kzfree.patch new file mode 100644 index 00000000000..73b78b9ed2c --- /dev/null +++ b/queue-4.19/mm-slab-use-memzero_explicit-in-kzfree.patch @@ -0,0 +1,54 @@ +From 8982ae527fbef170ef298650c15d55a9ccd33973 Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Thu, 25 Jun 2020 20:29:52 -0700 +Subject: mm/slab: use memzero_explicit() in kzfree() + +From: Waiman Long + +commit 8982ae527fbef170ef298650c15d55a9ccd33973 upstream. + +The kzfree() function is normally used to clear some sensitive +information, like encryption keys, in the buffer before freeing it back to +the pool. Memset() is currently used for buffer clearing. However +unlikely, there is still a non-zero probability that the compiler may +choose to optimize away the memory clearing especially if LTO is being +used in the future. + +To make sure that this optimization will never happen, +memzero_explicit(), which is introduced in v3.18, is now used in +kzfree() to future-proof it. + +Link: http://lkml.kernel.org/r/20200616154311.12314-2-longman@redhat.com +Fixes: 3ef0e5ba4673 ("slab: introduce kzfree()") +Signed-off-by: Waiman Long +Acked-by: Michal Hocko +Cc: David Howells +Cc: Jarkko Sakkinen +Cc: James Morris +Cc: "Serge E. Hallyn" +Cc: Joe Perches +Cc: Matthew Wilcox +Cc: David Rientjes +Cc: Johannes Weiner +Cc: Dan Carpenter +Cc: "Jason A . Donenfeld" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/slab_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -1540,7 +1540,7 @@ void kzfree(const void *p) + if (unlikely(ZERO_OR_NULL_PTR(mem))) + return; + ks = ksize(mem); +- memset(mem, 0, ks); ++ memzero_explicit(mem, ks); + kfree(mem); + } + EXPORT_SYMBOL(kzfree); diff --git a/queue-4.19/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch b/queue-4.19/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch new file mode 100644 index 00000000000..1fee1749f01 --- /dev/null +++ b/queue-4.19/ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch @@ -0,0 +1,98 @@ +From 4cd9973f9ff69e37dd0ba2bd6e6423f8179c329a Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Thu, 25 Jun 2020 20:29:30 -0700 +Subject: ocfs2: avoid inode removal while nfsd is accessing it + +From: Junxiao Bi + +commit 4cd9973f9ff69e37dd0ba2bd6e6423f8179c329a upstream. + +Patch series "ocfs2: fix nfsd over ocfs2 issues", v2. + +This is a series of patches to fix issues on nfsd over ocfs2. patch 1 +is to avoid inode removed while nfsd access it patch 2 & 3 is to fix a +panic issue. + +This patch (of 4): + +When nfsd is getting file dentry using handle or parent dentry of some +dentry, one cluster lock is used to avoid inode removed from other node, +but it still could be removed from local node, so use a rw lock to avoid +this. + +Link: http://lkml.kernel.org/r/20200616183829.87211-1-junxiao.bi@oracle.com +Link: http://lkml.kernel.org/r/20200616183829.87211-2-junxiao.bi@oracle.com +Signed-off-by: Junxiao Bi +Reviewed-by: Joseph Qi +Cc: Changwei Ge +Cc: Gang He +Cc: Joel Becker +Cc: Jun Piao +Cc: Mark Fasheh +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dlmglue.c | 17 ++++++++++++++++- + fs/ocfs2/ocfs2.h | 1 + + 2 files changed, 17 insertions(+), 1 deletion(-) + +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -682,6 +682,12 @@ static void ocfs2_nfs_sync_lock_res_init + &ocfs2_nfs_sync_lops, osb); + } + ++static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb) ++{ ++ ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); ++ init_rwsem(&osb->nfs_sync_rwlock); ++} ++ + void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) + { + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; +@@ -2851,6 +2857,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_sup + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + ++ if (ex) ++ down_write(&osb->nfs_sync_rwlock); ++ else ++ down_read(&osb->nfs_sync_rwlock); ++ + if (ocfs2_mount_local(osb)) + return 0; + +@@ -2869,6 +2880,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_ + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, + ex ? LKM_EXMODE : LKM_PRMODE); ++ if (ex) ++ up_write(&osb->nfs_sync_rwlock); ++ else ++ up_read(&osb->nfs_sync_rwlock); + } + + int ocfs2_trim_fs_lock(struct ocfs2_super *osb, +@@ -3314,7 +3329,7 @@ int ocfs2_dlm_init(struct ocfs2_super *o + local: + ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); + ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); +- ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); ++ ocfs2_nfs_sync_lock_init(osb); + ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); + + osb->cconn = conn; +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -406,6 +406,7 @@ struct ocfs2_super + struct ocfs2_lock_res osb_super_lockres; + struct ocfs2_lock_res osb_rename_lockres; + struct ocfs2_lock_res osb_nfs_sync_lockres; ++ struct rw_semaphore nfs_sync_rwlock; + struct ocfs2_lock_res osb_trim_fs_lockres; + struct ocfs2_dlm_debug *osb_dlm_debug; + diff --git a/queue-4.19/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch b/queue-4.19/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch new file mode 100644 index 00000000000..aae356e1d4c --- /dev/null +++ b/queue-4.19/ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch @@ -0,0 +1,90 @@ +From e5a15e17a78d58f933d17cafedfcf7486a29f5b4 Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Thu, 25 Jun 2020 20:29:37 -0700 +Subject: ocfs2: fix panic on nfs server over ocfs2 + +From: Junxiao Bi + +commit e5a15e17a78d58f933d17cafedfcf7486a29f5b4 upstream. + +The following kernel panic was captured when running nfs server over +ocfs2, at that time ocfs2_test_inode_bit() was checking whether one +inode locating at "blkno" 5 was valid, that is ocfs2 root inode, its +"suballoc_slot" was OCFS2_INVALID_SLOT(65535) and it was allocted from +//global_inode_alloc, but here it wrongly assumed that it was got from per +slot inode alloctor which would cause array overflow and trigger kernel +panic. + + BUG: unable to handle kernel paging request at 0000000000001088 + IP: [] _raw_spin_lock+0x18/0xf0 + PGD 1e06ba067 PUD 1e9e7d067 PMD 0 + Oops: 0002 [#1] SMP + CPU: 6 PID: 24873 Comm: nfsd Not tainted 4.1.12-124.36.1.el6uek.x86_64 #2 + Hardware name: Huawei CH121 V3/IT11SGCA1, BIOS 3.87 02/02/2018 + RIP: _raw_spin_lock+0x18/0xf0 + RSP: e02b:ffff88005ae97908 EFLAGS: 00010206 + RAX: ffff88005ae98000 RBX: 0000000000001088 RCX: 0000000000000000 + RDX: 0000000000020000 RSI: 0000000000000009 RDI: 0000000000001088 + RBP: ffff88005ae97928 R08: 0000000000000000 R09: ffff880212878e00 + R10: 0000000000007ff0 R11: 0000000000000000 R12: 0000000000001088 + R13: ffff8800063c0aa8 R14: ffff8800650c27d0 R15: 000000000000ffff + FS: 0000000000000000(0000) GS:ffff880218180000(0000) knlGS:ffff880218180000 + CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000001088 CR3: 00000002033d0000 CR4: 0000000000042660 + Call Trace: + igrab+0x1e/0x60 + ocfs2_get_system_file_inode+0x63/0x3a0 [ocfs2] + ocfs2_test_inode_bit+0x328/0xa00 [ocfs2] + ocfs2_get_parent+0xba/0x3e0 [ocfs2] + reconnect_path+0xb5/0x300 + exportfs_decode_fh+0xf6/0x2b0 + fh_verify+0x350/0x660 [nfsd] + nfsd4_putfh+0x4d/0x60 [nfsd] + nfsd4_proc_compound+0x3d3/0x6f0 [nfsd] + nfsd_dispatch+0xe0/0x290 [nfsd] + svc_process_common+0x412/0x6a0 [sunrpc] + svc_process+0x123/0x210 [sunrpc] + nfsd+0xff/0x170 [nfsd] + kthread+0xcb/0xf0 + ret_from_fork+0x61/0x90 + Code: 83 c2 02 0f b7 f2 e8 18 dc 91 ff 66 90 eb bf 0f 1f 40 00 55 48 89 e5 41 56 41 55 41 54 53 0f 1f 44 00 00 48 89 fb ba 00 00 02 00 0f c1 17 89 d0 45 31 e4 45 31 ed c1 e8 10 66 39 d0 41 89 c6 + RIP _raw_spin_lock+0x18/0xf0 + CR2: 0000000000001088 + ---[ end trace 7264463cd1aac8f9 ]--- + Kernel panic - not syncing: Fatal exception + +Link: http://lkml.kernel.org/r/20200616183829.87211-4-junxiao.bi@oracle.com +Signed-off-by: Junxiao Bi +Reviewed-by: Joseph Qi +Cc: Changwei Ge +Cc: Gang He +Cc: Joel Becker +Cc: Jun Piao +Cc: Mark Fasheh +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/suballoc.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/fs/ocfs2/suballoc.c ++++ b/fs/ocfs2/suballoc.c +@@ -2841,9 +2841,12 @@ int ocfs2_test_inode_bit(struct ocfs2_su + goto bail; + } + +- inode_alloc_inode = +- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, +- suballoc_slot); ++ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT) ++ inode_alloc_inode = ocfs2_get_system_file_inode(osb, ++ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot); ++ else ++ inode_alloc_inode = ocfs2_get_system_file_inode(osb, ++ INODE_ALLOC_SYSTEM_INODE, suballoc_slot); + if (!inode_alloc_inode) { + /* the error code could be inaccurate, but we are not able to + * get the correct one. */ diff --git a/queue-4.19/ocfs2-fix-value-of-ocfs2_invalid_slot.patch b/queue-4.19/ocfs2-fix-value-of-ocfs2_invalid_slot.patch new file mode 100644 index 00000000000..0e8131fcc52 --- /dev/null +++ b/queue-4.19/ocfs2-fix-value-of-ocfs2_invalid_slot.patch @@ -0,0 +1,53 @@ +From 9277f8334ffc719fe922d776444d6e4e884dbf30 Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Thu, 25 Jun 2020 20:29:40 -0700 +Subject: ocfs2: fix value of OCFS2_INVALID_SLOT + +From: Junxiao Bi + +commit 9277f8334ffc719fe922d776444d6e4e884dbf30 upstream. + +In the ocfs2 disk layout, slot number is 16 bits, but in ocfs2 +implementation, slot number is 32 bits. Usually this will not cause any +issue, because slot number is converted from u16 to u32, but +OCFS2_INVALID_SLOT was defined as -1, when an invalid slot number from +disk was obtained, its value was (u16)-1, and it was converted to u32. +Then the following checking in get_local_system_inode will be always +skipped: + + static struct inode **get_local_system_inode(struct ocfs2_super *osb, + int type, + u32 slot) + { + BUG_ON(slot == OCFS2_INVALID_SLOT); + ... + } + +Link: http://lkml.kernel.org/r/20200616183829.87211-5-junxiao.bi@oracle.com +Signed-off-by: Junxiao Bi +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/ocfs2_fs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ocfs2/ocfs2_fs.h ++++ b/fs/ocfs2/ocfs2_fs.h +@@ -303,7 +303,7 @@ + #define OCFS2_MAX_SLOTS 255 + + /* Slot map indicator for an empty slot */ +-#define OCFS2_INVALID_SLOT -1 ++#define OCFS2_INVALID_SLOT ((u16)-1) + + #define OCFS2_VOL_UUID_LEN 16 + #define OCFS2_MAX_VOL_LABEL_LEN 64 diff --git a/queue-4.19/ocfs2-load-global_inode_alloc.patch b/queue-4.19/ocfs2-load-global_inode_alloc.patch new file mode 100644 index 00000000000..08c51a7dd8b --- /dev/null +++ b/queue-4.19/ocfs2-load-global_inode_alloc.patch @@ -0,0 +1,43 @@ +From 7569d3c754e452769a5747eeeba488179e38a5da Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Thu, 25 Jun 2020 20:29:33 -0700 +Subject: ocfs2: load global_inode_alloc + +From: Junxiao Bi + +commit 7569d3c754e452769a5747eeeba488179e38a5da upstream. + +Set global_inode_alloc as OCFS2_FIRST_ONLINE_SYSTEM_INODE, that will +make it load during mount. It can be used to test whether some +global/system inodes are valid. One use case is that nfsd will test +whether root inode is valid. + +Link: http://lkml.kernel.org/r/20200616183829.87211-3-junxiao.bi@oracle.com +Signed-off-by: Junxiao Bi +Reviewed-by: Joseph Qi +Cc: Changwei Ge +Cc: Gang He +Cc: Joel Becker +Cc: Jun Piao +Cc: Mark Fasheh +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/ocfs2_fs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ocfs2/ocfs2_fs.h ++++ b/fs/ocfs2/ocfs2_fs.h +@@ -339,8 +339,8 @@ struct ocfs2_system_inode_info { + enum { + BAD_BLOCK_SYSTEM_INODE = 0, + GLOBAL_INODE_ALLOC_SYSTEM_INODE, ++#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE + SLOT_MAP_SYSTEM_INODE, +-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE + HEARTBEAT_SYSTEM_INODE, + GLOBAL_BITMAP_SYSTEM_INODE, + USER_QUOTA_SYSTEM_INODE, diff --git a/queue-4.19/series b/queue-4.19/series index cd0e00674f6..7d72a3d51f1 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -108,3 +108,11 @@ erofs-fix-partially-uninitialized-misuse-in-z_erofs_onlinepage_fixup.patch kvm-x86-fix-msr-range-of-apic-registers-in-x2apic-mode.patch kvm-nvmx-plumb-l2-gpa-through-to-pml-emulation.patch x86-asm-64-align-start-of-__clear_user-loop-to-16-bytes.patch +btrfs-fix-data-block-group-relocation-failure-due-to-concurrent-scrub.patch +btrfs-fix-failure-of-rwf_nowait-write-into-prealloc-extent-beyond-eof.patch +mm-slab-use-memzero_explicit-in-kzfree.patch +ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch +ocfs2-load-global_inode_alloc.patch +ocfs2-fix-value-of-ocfs2_invalid_slot.patch +ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch +arm64-perf-report-the-pc-value-in-regs_abi_32-mode.patch -- 2.47.3