]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
xfs patches for 4.19
authorSasha Levin <sashal@kernel.org>
Sun, 10 Feb 2019 00:08:53 +0000 (19:08 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 10 Feb 2019 00:08:53 +0000 (19:08 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.19/fs-xfs-fix-f_ffree-value-for-statfs-when-project-quo.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/xfs-cancel-cow-blocks-before-swapext.patch [new file with mode: 0644]
queue-4.19/xfs-delalloc-unwritten-cow-fork-allocation-can-go-wr.patch [new file with mode: 0644]
queue-4.19/xfs-fix-error-code-in-xfs_ioc_getbmap.patch [new file with mode: 0644]
queue-4.19/xfs-fix-inverted-return-from-xfs_btree_sblock_verify.patch [new file with mode: 0644]
queue-4.19/xfs-fix-overflow-in-xfs_attr3_leaf_verify.patch [new file with mode: 0644]
queue-4.19/xfs-fix-page_mask-usage-in-xfs_free_file_space.patch [new file with mode: 0644]
queue-4.19/xfs-fix-shared-extent-data-corruption-due-to-missing.patch [new file with mode: 0644]
queue-4.19/xfs-fix-transient-reference-count-error-in-xfs_buf_r.patch [new file with mode: 0644]
queue-4.19/xfs-fix-xqmstats-offsets-in-proc-fs-xfs-xqmstat.patch [new file with mode: 0644]

diff --git a/queue-4.19/fs-xfs-fix-f_ffree-value-for-statfs-when-project-quo.patch b/queue-4.19/fs-xfs-fix-f_ffree-value-for-statfs-when-project-quo.patch
new file mode 100644 (file)
index 0000000..c1255d7
--- /dev/null
@@ -0,0 +1,34 @@
+From c9d7615c9453ed32e73aeb42fbbd98cd8f170ae9 Mon Sep 17 00:00:00 2001
+From: Ye Yin <dbyin@tencent.com>
+Date: Mon, 4 Feb 2019 08:54:25 -0800
+Subject: fs/xfs: fix f_ffree value for statfs when project quota is set
+
+commit de7243057e7cefa923fa5f467c0f1ec24eef41d2 upsream.
+
+When project is set, we should use inode limit minus the used count
+
+Signed-off-by: Ye Yin <dbyin@tencent.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_qm_bhv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
+index 73a1d77ec187..3091e4bc04ef 100644
+--- a/fs/xfs/xfs_qm_bhv.c
++++ b/fs/xfs/xfs_qm_bhv.c
+@@ -40,7 +40,7 @@ xfs_fill_statvfs_from_dquot(
+               statp->f_files = limit;
+               statp->f_ffree =
+                       (statp->f_files > dqp->q_res_icount) ?
+-                       (statp->f_ffree - dqp->q_res_icount) : 0;
++                       (statp->f_files - dqp->q_res_icount) : 0;
+       }
+ }
+-- 
+2.19.1
+
index e897e55600e7e3fe133526b9d561a53032e06c6e..4e54b360b268829a2defb8ef7fcb845e93dd70ca 100644 (file)
@@ -245,3 +245,13 @@ fs-epoll-drop-ovflist-branch-prediction.patch
 exec-load_script-don-t-blindly-truncate-shebang-stri.patch
 kernel-kcov.c-mark-write_comp_data-as-notrace.patch
 scripts-gdb-fix-lx-version-string-output.patch
+xfs-fix-xqmstats-offsets-in-proc-fs-xfs-xqmstat.patch
+xfs-cancel-cow-blocks-before-swapext.patch
+xfs-fix-error-code-in-xfs_ioc_getbmap.patch
+xfs-fix-overflow-in-xfs_attr3_leaf_verify.patch
+xfs-fix-shared-extent-data-corruption-due-to-missing.patch
+xfs-fix-transient-reference-count-error-in-xfs_buf_r.patch
+xfs-delalloc-unwritten-cow-fork-allocation-can-go-wr.patch
+fs-xfs-fix-f_ffree-value-for-statfs-when-project-quo.patch
+xfs-fix-page_mask-usage-in-xfs_free_file_space.patch
+xfs-fix-inverted-return-from-xfs_btree_sblock_verify.patch
diff --git a/queue-4.19/xfs-cancel-cow-blocks-before-swapext.patch b/queue-4.19/xfs-cancel-cow-blocks-before-swapext.patch
new file mode 100644 (file)
index 0000000..3fed9db
--- /dev/null
@@ -0,0 +1,97 @@
+From 1420093f69b61f8db0f191ac52057ef1bd1006f8 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 4 Feb 2019 08:54:19 -0800
+Subject: xfs: cancel COW blocks before swapext
+
+commit 96987eea537d6ccd98704a71958f9ba02da80843 upstream.
+
+We need to make sure we have no outstanding COW blocks before we swap
+extents, as there is nothing preventing us from having preallocated COW
+delalloc on either inode that swapext is called on.  That case can
+easily be reproduced by running generic/324 in always_cow mode:
+
+[  620.760572] XFS: Assertion failed: tip->i_delayed_blks == 0, file: fs/xfs/xfs_bmap_util.c, line: 1669
+[  620.761608] ------------[ cut here ]------------
+[  620.762171] kernel BUG at fs/xfs/xfs_message.c:102!
+[  620.762732] invalid opcode: 0000 [#1] SMP PTI
+[  620.763272] CPU: 0 PID: 24153 Comm: xfs_fsr Tainted: G        W         4.19.0-rc1+ #4182
+[  620.764203] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014
+[  620.765202] RIP: 0010:assfail+0x20/0x28
+[  620.765646] Code: 31 ff e8 83 fc ff ff 0f 0b c3 48 89 f1 41 89 d0 48 c7 c6 48 ca 8d 82 48 89 fa 38
+[  620.767758] RSP: 0018:ffffc9000898bc10 EFLAGS: 00010202
+[  620.768359] RAX: 0000000000000000 RBX: ffff88012f14ba40 RCX: 0000000000000000
+[  620.769174] RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff828560d9
+[  620.769982] RBP: ffff88012f14b300 R08: 0000000000000000 R09: 0000000000000000
+[  620.770788] R10: 000000000000000a R11: f000000000000000 R12: ffffc9000898bc98
+[  620.771638] R13: ffffc9000898bc9c R14: ffff880130b5e2b8 R15: ffff88012a1fa2a8
+[  620.772504] FS:  00007fdc36e0fbc0(0000) GS:ffff88013ba00000(0000) knlGS:0000000000000000
+[  620.773475] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  620.774168] CR2: 00007fdc3604d000 CR3: 0000000132afc000 CR4: 00000000000006f0
+[  620.774978] Call Trace:
+[  620.775274]  xfs_swap_extent_forks+0x2a0/0x2e0
+[  620.775792]  xfs_swap_extents+0x38b/0xab0
+[  620.776256]  xfs_ioc_swapext+0x121/0x140
+[  620.776709]  xfs_file_ioctl+0x328/0xc90
+[  620.777154]  ? rcu_read_lock_sched_held+0x50/0x60
+[  620.777694]  ? xfs_iunlock+0x233/0x260
+[  620.778127]  ? xfs_setattr_nonsize+0x3be/0x6a0
+[  620.778647]  do_vfs_ioctl+0x9d/0x680
+[  620.779071]  ? ksys_fchown+0x47/0x80
+[  620.779552]  ksys_ioctl+0x35/0x70
+[  620.780040]  __x64_sys_ioctl+0x11/0x20
+[  620.780530]  do_syscall_64+0x4b/0x190
+[  620.780927]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+[  620.781467] RIP: 0033:0x7fdc364d0f07
+[  620.781900] Code: b3 66 90 48 8b 05 81 5f 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 28
+[  620.784044] RSP: 002b:00007ffe2a766038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+[  620.784896] RAX: ffffffffffffffda RBX: 0000000000000025 RCX: 00007fdc364d0f07
+[  620.785667] RDX: 0000560296ca2fc0 RSI: 00000000c0c0586d RDI: 0000000000000005
+[  620.786398] RBP: 0000000000000025 R08: 0000000000001200 R09: 0000000000000000
+[  620.787283] R10: 0000000000000432 R11: 0000000000000246 R12: 0000000000000005
+[  620.788051] R13: 0000000000000000 R14: 0000000000001000 R15: 0000000000000006
+[  620.788927] Modules linked in:
+[  620.789340] ---[ end trace 9503b7417ffdbdb0 ]---
+[  620.790065] RIP: 0010:assfail+0x20/0x28
+[  620.790642] Code: 31 ff e8 83 fc ff ff 0f 0b c3 48 89 f1 41 89 d0 48 c7 c6 48 ca 8d 82 48 89 fa 38
+[  620.793038] RSP: 0018:ffffc9000898bc10 EFLAGS: 00010202
+[  620.793609] RAX: 0000000000000000 RBX: ffff88012f14ba40 RCX: 0000000000000000
+[  620.794317] RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff828560d9
+[  620.795025] RBP: ffff88012f14b300 R08: 0000000000000000 R09: 0000000000000000
+[  620.795778] R10: 000000000000000a R11: f000000000000000 R12: ffffc9000898bc98
+[  620.796675] R13: ffffc9000898bc9c R14: ffff880130b5e2b8 R15: ffff88012a1fa2a8
+[  620.797782] FS:  00007fdc36e0fbc0(0000) GS:ffff88013ba00000(0000) knlGS:0000000000000000
+[  620.798908] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  620.799594] CR2: 00007fdc3604d000 CR3: 0000000132afc000 CR4: 00000000000006f0
+[  620.800424] Kernel panic - not syncing: Fatal exception
+[  620.801191] Kernel Offset: disabled
+[  620.801597] ---[ end Kernel panic - not syncing: Fatal exception ]---
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_bmap_util.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 6de8d90041ff..9d1e5c3a661e 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1824,6 +1824,12 @@ xfs_swap_extents(
+       if (error)
+               goto out_unlock;
++      if (xfs_inode_has_cow_data(tip)) {
++              error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
++              if (error)
++                      return error;
++      }
++
+       /*
+        * Extent "swapping" with rmap requires a permanent reservation and
+        * a block reservation because it's really just a remap operation
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-delalloc-unwritten-cow-fork-allocation-can-go-wr.patch b/queue-4.19/xfs-delalloc-unwritten-cow-fork-allocation-can-go-wr.patch
new file mode 100644 (file)
index 0000000..411dfb9
--- /dev/null
@@ -0,0 +1,105 @@
+From 67e647a64fdd24626b888e3a0acfd0226b62db5e Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:24 -0800
+Subject: xfs: delalloc -> unwritten COW fork allocation can go wrong
+
+commit 9230a0b65b47fe6856c4468ec0175c4987e5bede upstream.
+
+Long saga. There have been days spent following this through dead end
+after dead end in multi-GB event traces. This morning, after writing
+a trace-cmd wrapper that enabled me to be more selective about XFS
+trace points, I discovered that I could get just enough essential
+tracepoints enabled that there was a 50:50 chance the fsx config
+would fail at ~115k ops. If it didn't fail at op 115547, I stopped
+fsx at op 115548 anyway.
+
+That gave me two traces - one where the problem manifested, and one
+where it didn't. After refining the traces to have the necessary
+information, I found that in the failing case there was a real
+extent in the COW fork compared to an unwritten extent in the
+working case.
+
+Walking back through the two traces to the point where the CWO fork
+extents actually diverged, I found that the bad case had an extra
+unwritten extent in it. This is likely because the bug it led me to
+had triggered multiple times in those 115k ops, leaving stray
+COW extents around. What I saw was a COW delalloc conversion to an
+unwritten extent (as they should always be through
+xfs_iomap_write_allocate()) resulted in a /written extent/:
+
+xfs_writepage:        dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0
+xfs_iext_remove:      dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real
+xfs_bmap_pre_update:  dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real
+xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex
+
+Basically, Cow fork before:
+
+       0 1            32          52
+       +H+DDDDDDDDDDDD+UUUUUUUUUUU+
+          PREV         RIGHT
+
+COW delalloc conversion allocates:
+
+         1            32
+         +uuuuuuuuuuuu+
+         NEW
+
+And the result according to the xfs_bmap_post_update trace was:
+
+       0 1            32          52
+       +H+wwwwwwwwwwwwwwwwwwwwwwww+
+          PREV
+
+Which is clearly wrong - it should be a merged unwritten extent,
+not an unwritten extent.
+
+That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG
+case in xfs_bmap_add_extent_delay_real(), and sure enough, there's
+the bug.
+
+It takes the old delalloc extent (PREV) and adds the length of the
+RIGHT extent to it, takes the start block from NEW, removes the
+RIGHT extent and then updates PREV with the new extent.
+
+What it fails to do is update PREV.br_state. For delalloc, this is
+always XFS_EXT_NORM, while in this case we are converting the
+delayed allocation to unwritten, so it needs to be updated to
+XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so
+the resultant extent is always written.
+
+And that's the bug I've been chasing for a week - a bmap btree bug,
+not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced
+with the recent in core extent tree scalability enhancements.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
+index a47670332326..3a496ffe6551 100644
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -1683,10 +1683,13 @@ xfs_bmap_add_extent_delay_real(
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Filling in all of a previously delayed allocation extent.
+-               * The right neighbor is contiguous, the left is not.
++               * The right neighbor is contiguous, the left is not. Take care
++               * with delay -> unwritten extent allocation here because the
++               * delalloc record we are overwriting is always written.
+                */
+               PREV.br_startblock = new->br_startblock;
+               PREV.br_blockcount += RIGHT.br_blockcount;
++              PREV.br_state = new->br_state;
+               xfs_iext_next(ifp, &bma->icur);
+               xfs_iext_remove(bma->ip, &bma->icur, state);
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-error-code-in-xfs_ioc_getbmap.patch b/queue-4.19/xfs-fix-error-code-in-xfs_ioc_getbmap.patch
new file mode 100644 (file)
index 0000000..de0754d
--- /dev/null
@@ -0,0 +1,43 @@
+From 1f4e06ad825498d4f593f74d7708a3781db4a3c7 Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Mon, 4 Feb 2019 08:54:20 -0800
+Subject: xfs: Fix error code in 'xfs_ioc_getbmap()'
+
+commit 132bf6723749f7219c399831eeb286dbbb985429 upstream.
+
+In this function, once 'buf' has been allocated, we unconditionally
+return 0.
+However, 'error' is set to some error codes in several error handling
+paths.
+Before commit 232b51948b99 ("xfs: simplify the xfs_getbmap interface")
+this was not an issue because all error paths were returning directly,
+but now that some cleanup at the end may be needed, we must propagate the
+error code.
+
+Fixes: 232b51948b99 ("xfs: simplify the xfs_getbmap interface")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_ioctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
+index 0ef5ece5634c..bad90479ade2 100644
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1616,7 +1616,7 @@ xfs_ioc_getbmap(
+       error = 0;
+ out_free_buf:
+       kmem_free(buf);
+-      return 0;
++      return error;
+ }
+ struct getfsmap_info {
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-inverted-return-from-xfs_btree_sblock_verify.patch b/queue-4.19/xfs-fix-inverted-return-from-xfs_btree_sblock_verify.patch
new file mode 100644 (file)
index 0000000..21522a7
--- /dev/null
@@ -0,0 +1,40 @@
+From 94c38adb9d6c48d3354c5d312a8b877f1fb1b89f Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:27 -0800
+Subject: xfs: fix inverted return from xfs_btree_sblock_verify_crc
+
+commit 7d048df4e9b05ba89b74d062df59498aa81f3785 upstream.
+
+xfs_btree_sblock_verify_crc is a bool so should not be returning
+a failaddr_t; worse, if xfs_log_check_lsn fails it returns
+__this_address which looks like a boolean true (i.e. success)
+to the caller.
+
+(interestingly xfs_btree_lblock_verify_crc doesn't have the issue)
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_btree.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
+index 34c6d7bd4d18..bbdae2b4559f 100644
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -330,7 +330,7 @@ xfs_btree_sblock_verify_crc(
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
+-                      return __this_address;
++                      return false;
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+       }
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-overflow-in-xfs_attr3_leaf_verify.patch b/queue-4.19/xfs-fix-overflow-in-xfs_attr3_leaf_verify.patch
new file mode 100644 (file)
index 0000000..445cd94
--- /dev/null
@@ -0,0 +1,99 @@
+From 529a0bd5e10cda0762fcf66cc6acf9d0624fade2 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:21 -0800
+Subject: xfs: fix overflow in xfs_attr3_leaf_verify
+
+commit 837514f7a4ca4aca06aec5caa5ff56d33ef06976 upstream.
+
+generic/070 on 64k block size filesystems is failing with a verifier
+corruption on writeback or an attribute leaf block:
+
+[   94.973083] XFS (pmem0): Metadata corruption detected at xfs_attr3_leaf_verify+0x246/0x260, xfs_attr3_leaf block 0x811480
+[   94.975623] XFS (pmem0): Unmount and run xfs_repair
+[   94.976720] XFS (pmem0): First 128 bytes of corrupted metadata buffer:
+[   94.978270] 000000004b2e7b45: 00 00 00 00 00 00 00 00 3b ee 00 00 00 00 00 00  ........;.......
+[   94.980268] 000000006b1db90b: 00 00 00 00 00 81 14 80 00 00 00 00 00 00 00 00  ................
+[   94.982251] 00000000433f2407: 22 7b 5c 82 2d 5c 47 4c bb 31 1c 37 fa a9 ce d6  "{\.-\GL.1.7....
+[   94.984157] 0000000010dc7dfb: 00 00 00 00 00 81 04 8a 00 0a 18 e8 dd 94 01 00  ................
+[   94.986215] 00000000d5a19229: 00 a0 dc f4 fe 98 01 68 f0 d8 07 e0 00 00 00 00  .......h........
+[   94.988171] 00000000521df36c: 0c 2d 32 e2 fe 20 01 00 0c 2d 58 65 fe 0c 01 00  .-2.. ...-Xe....
+[   94.990162] 000000008477ae06: 0c 2d 5b 66 fe 8c 01 00 0c 2d 71 35 fe 7c 01 00  .-[f.....-q5.|..
+[   94.992139] 00000000a4a6bca6: 0c 2d 72 37 fc d4 01 00 0c 2d d8 b8 f0 90 01 00  .-r7.....-......
+[   94.994789] XFS (pmem0): xfs_do_force_shutdown(0x8) called from line 1453 of file fs/xfs/xfs_buf.c. Return address = ffffffff815365f3
+
+This is failing this check:
+
+                end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+                if (end < ichdr.freemap[i].base)
+>>>>>                   return __this_address;
+                if (end > mp->m_attr_geo->blksize)
+                        return __this_address;
+
+And from the buffer output above, the freemap array is:
+
+       freemap[0].base = 0x00a0
+       freemap[0].size = 0xdcf4        end = 0xdd94
+       freemap[1].base = 0xfe98
+       freemap[1].size = 0x0168        end = 0x10000
+       freemap[2].base = 0xf0d8
+       freemap[2].size = 0x07e0        end = 0xf8b8
+
+These all look valid - the block size is 0x10000 and so from the
+last check in the above verifier fragment we know that the end
+of freemap[1] is valid. The problem is that end is declared as:
+
+       uint16_t        end;
+
+And (uint16_t)0x10000 = 0. So we have a verifier bug here, not a
+corruption. Fix the verifier to use uint32_t types for the check and
+hence avoid the overflow.
+
+Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=201577
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
+index 6fc5425b1474..2652d00842d6 100644
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -243,7 +243,7 @@ xfs_attr3_leaf_verify(
+       struct xfs_mount                *mp = bp->b_target->bt_mount;
+       struct xfs_attr_leafblock       *leaf = bp->b_addr;
+       struct xfs_attr_leaf_entry      *entries;
+-      uint16_t                        end;
++      uint32_t                        end;    /* must be 32bit - see below */
+       int                             i;
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
+@@ -293,6 +293,11 @@ xfs_attr3_leaf_verify(
+       /*
+        * Quickly check the freemap information.  Attribute data has to be
+        * aligned to 4-byte boundaries, and likewise for the free space.
++       *
++       * Note that for 64k block size filesystems, the freemap entries cannot
++       * overflow as they are only be16 fields. However, when checking end
++       * pointer of the freemap, we have to be careful to detect overflows and
++       * so use uint32_t for those checks.
+        */
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+               if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
+@@ -303,7 +308,9 @@ xfs_attr3_leaf_verify(
+                       return __this_address;
+               if (ichdr.freemap[i].size & 0x3)
+                       return __this_address;
+-              end = ichdr.freemap[i].base + ichdr.freemap[i].size;
++
++              /* be care of 16 bit overflows here */
++              end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size;
+               if (end < ichdr.freemap[i].base)
+                       return __this_address;
+               if (end > mp->m_attr_geo->blksize)
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-page_mask-usage-in-xfs_free_file_space.patch b/queue-4.19/xfs-fix-page_mask-usage-in-xfs_free_file_space.patch
new file mode 100644 (file)
index 0000000..2d43ec7
--- /dev/null
@@ -0,0 +1,48 @@
+From 58d9d8be24ec85c1d10c97af71a436b8211f0a8b Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 4 Feb 2019 08:54:26 -0800
+Subject: xfs: fix PAGE_MASK usage in xfs_free_file_space
+
+commit a579121f94aba4e8bad1a121a0fad050d6925296 upstream.
+
+In commit e53c4b598, I *tried* to teach xfs to force writeback when we
+fzero/fpunch right up to EOF so that if EOF is in the middle of a page,
+the post-EOF part of the page gets zeroed before we return to userspace.
+Unfortunately, I missed the part where PAGE_MASK is ~(PAGE_SIZE - 1),
+which means that we totally fail to zero if we're fpunching and EOF is
+within the first page.  Worse yet, the same PAGE_MASK thinko plagues the
+filemap_write_and_wait_range call, so we'd initiate writeback of the
+entire file, which (mostly) masked the thinko.
+
+Drop the tricky PAGE_MASK and replace it with correct usage of PAGE_SIZE
+and the proper rounding macros.
+
+Fixes: e53c4b598 ("xfs: ensure post-EOF zeroing happens after zeroing part of a file")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_bmap_util.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 9d1e5c3a661e..211b06e4702e 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1175,9 +1175,9 @@ xfs_free_file_space(
+        * page could be mmap'd and iomap_zero_range doesn't do that for us.
+        * Writeback of the eof page will do this, albeit clumsily.
+        */
+-      if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
++      if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
+               error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+-                              (offset + len) & ~PAGE_MASK, LLONG_MAX);
++                              round_down(offset + len, PAGE_SIZE), LLONG_MAX);
+       }
+       return error;
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-shared-extent-data-corruption-due-to-missing.patch b/queue-4.19/xfs-fix-shared-extent-data-corruption-due-to-missing.patch
new file mode 100644 (file)
index 0000000..026e38f
--- /dev/null
@@ -0,0 +1,72 @@
+From 0eea3903a949b78f08fc2c90e25c8e8958ac8d71 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:22 -0800
+Subject: xfs: fix shared extent data corruption due to missing cow reservation
+
+commit 59e4293149106fb92530f8e56fa3992d8548c5e6 upstream.
+
+Page writeback indirectly handles shared extents via the existence
+of overlapping COW fork blocks. If COW fork blocks exist, writeback
+always performs the associated copy-on-write regardless if the
+underlying blocks are actually shared. If the blocks are shared,
+then overlapping COW fork blocks must always exist.
+
+fstests shared/010 reproduces a case where a buffered write occurs
+over a shared block without performing the requisite COW fork
+reservation.  This ultimately causes writeback to the shared extent
+and data corruption that is detected across md5 checks of the
+filesystem across a mount cycle.
+
+The problem occurs when a buffered write lands over a shared extent
+that crosses an extent size hint boundary and that also happens to
+have a partial COW reservation that doesn't cover the start and end
+blocks of the data fork extent.
+
+For example, a buffered write occurs across the file offset (in FSB
+units) range of [29, 57]. A shared extent exists at blocks [29, 35]
+and COW reservation already exists at blocks [32, 34]. After
+accommodating a COW extent size hint of 32 blocks and the existing
+reservation at offset 32, xfs_reflink_reserve_cow() allocates 32
+blocks of reservation at offset 0 and returns with COW reservation
+across the range of [0, 34]. The associated data fork extent is
+still [29, 35], however, which isn't fully covered by the COW
+reservation.
+
+This leads to a buffered write at file offset 35 over a shared
+extent without associated COW reservation. Writeback eventually
+kicks in, performs an overwrite of the underlying shared block and
+causes the associated data corruption.
+
+Update xfs_reflink_reserve_cow() to accommodate the fact that a
+delalloc allocation request may not fully cover the extent in the
+data fork. Trim the data fork extent appropriately, just as is done
+for shared extent boundaries and/or existing COW reservations that
+happen to overlap the start of the data fork extent. This prevents
+shared/010 failures due to data corruption on reflink enabled
+filesystems.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_reflink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
+index 42ea7bab9144..7088f44c0c59 100644
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -302,6 +302,7 @@ xfs_reflink_reserve_cow(
+       if (error)
+               return error;
++      xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+       trace_xfs_reflink_cow_alloc(ip, &got);
+       return 0;
+ }
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-transient-reference-count-error-in-xfs_buf_r.patch b/queue-4.19/xfs-fix-transient-reference-count-error-in-xfs_buf_r.patch
new file mode 100644 (file)
index 0000000..73d3cd0
--- /dev/null
@@ -0,0 +1,89 @@
+From 62b34e7358db94827e8f305cfc9e83e4a83b0ec8 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:23 -0800
+Subject: xfs: fix transient reference count error in
+ xfs_buf_resubmit_failed_buffers
+
+commit d43aaf1685aa471f0593685c9f54d53e3af3cf3f upstream.
+
+When retrying a failed inode or dquot buffer,
+xfs_buf_resubmit_failed_buffers() clears all the failed flags from
+the inde/dquot log items. In doing so, it also drops all the
+reference counts on the buffer that the failed log items hold. This
+means it can drop all the active references on the buffer and hence
+free the buffer before it queues it for write again.
+
+Putting the buffer on the delwri queue takes a reference to the
+buffer (so that it hangs around until it has been written and
+completed), but this goes bang if the buffer has already been freed.
+
+Hence we need to add the buffer to the delwri queue before we remove
+the failed flags from the log items attached to the buffer to ensure
+it always remains referenced during the resubmit process.
+
+Reported-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_buf_item.c | 28 +++++++++++++++++++++-------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
+index 12d8455bfbb2..010db5f8fb00 100644
+--- a/fs/xfs/xfs_buf_item.c
++++ b/fs/xfs/xfs_buf_item.c
+@@ -1233,9 +1233,23 @@ xfs_buf_iodone(
+ }
+ /*
+- * Requeue a failed buffer for writeback
++ * Requeue a failed buffer for writeback.
+  *
+- * Return true if the buffer has been re-queued properly, false otherwise
++ * We clear the log item failed state here as well, but we have to be careful
++ * about reference counts because the only active reference counts on the buffer
++ * may be the failed log items. Hence if we clear the log item failed state
++ * before queuing the buffer for IO we can release all active references to
++ * the buffer and free it, leading to use after free problems in
++ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
++ * order we process them in - the buffer is locked, and we own the buffer list
++ * so nothing on them is going to change while we are performing this action.
++ *
++ * Hence we can safely queue the buffer for IO before we clear the failed log
++ * item state, therefore  always having an active reference to the buffer and
++ * avoiding the transient zero-reference state that leads to use-after-free.
++ *
++ * Return true if the buffer was added to the buffer list, false if it was
++ * already on the buffer list.
+  */
+ bool
+ xfs_buf_resubmit_failed_buffers(
+@@ -1243,16 +1257,16 @@ xfs_buf_resubmit_failed_buffers(
+       struct list_head        *buffer_list)
+ {
+       struct xfs_log_item     *lip;
++      bool                    ret;
++
++      ret = xfs_buf_delwri_queue(bp, buffer_list);
+       /*
+-       * Clear XFS_LI_FAILED flag from all items before resubmit
+-       *
+-       * XFS_LI_FAILED set/clear is protected by ail_lock, caller  this
++       * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this
+        * function already have it acquired
+        */
+       list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+               xfs_clear_li_failed(lip);
+-      /* Add this buffer back to the delayed write list */
+-      return xfs_buf_delwri_queue(bp, buffer_list);
++      return ret;
+ }
+-- 
+2.19.1
+
diff --git a/queue-4.19/xfs-fix-xqmstats-offsets-in-proc-fs-xfs-xqmstat.patch b/queue-4.19/xfs-fix-xqmstats-offsets-in-proc-fs-xfs-xqmstat.patch
new file mode 100644 (file)
index 0000000..acb96d9
--- /dev/null
@@ -0,0 +1,43 @@
+From 939c6a578dc8aaecc5d35e4269198f8fb7c42c16 Mon Sep 17 00:00:00 2001
+From: Carlos Maiolino <cmaiolino@redhat.com>
+Date: Mon, 4 Feb 2019 08:54:18 -0800
+Subject: xfs: Fix xqmstats offsets in /proc/fs/xfs/xqmstat
+
+commit 41657e5507b13e963be906d5d874f4f02374fd5c upstream.
+
+The addition of FIBT, RMAP and REFCOUNT changed the offsets into
+__xfssats structure.
+
+This caused xqmstat_proc_show() to display garbage data via
+/proc/fs/xfs/xqmstat, once it relies on the offsets marked via macros.
+
+Fix it.
+
+Fixes: 00f4e4f9 xfs: add rmap btree stats infrastructure
+Fixes: aafc3c24 xfs: support the XFS_BTNUM_FINOBT free inode btree type
+Fixes: 46eeb521 xfs: introduce refcount btree definitions
+Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_stats.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
+index 4e4423153071..740ac9674848 100644
+--- a/fs/xfs/xfs_stats.c
++++ b/fs/xfs/xfs_stats.c
+@@ -119,7 +119,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
+       int j;
+       seq_printf(m, "qm");
+-      for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
++      for (j = XFSSTAT_END_REFCOUNT; j < XFSSTAT_END_XQMSTAT; j++)
+               seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
+       seq_putc(m, '\n');
+       return 0;
+-- 
+2.19.1
+