]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Jan 2017 10:32:52 +0000 (11:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Jan 2017 10:32:52 +0000 (11:32 +0100)
added patches:
xfs-always-succeed-when-deduping-zero-bytes.patch
xfs-check-for-bogus-values-in-btree-block-headers.patch
xfs-check-minimum-block-size-for-crc-filesystems.patch
xfs-check-return-value-of-_trans_reserve_quota_nblks.patch
xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch
xfs-complain-if-we-don-t-get-nextents-bmap-records.patch
xfs-don-t-allow-di_size-with-high-bit-set.patch
xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch
xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch
xfs-don-t-cap-maximum-dedupe-request-length.patch
xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch
xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch
xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch
xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch
xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch
xfs-fix-double-cleanup-when-cui-recovery-fails.patch
xfs-fix-max_retries-_show-and-_store-functions.patch
xfs-fix-unbalanced-inode-reclaim-flush-locking.patch
xfs-forbid-ag-btrees-with-level-0.patch
xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch
xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch
xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch
xfs-new-inode-extent-list-lookup-helpers.patch
xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch
xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch
xfs-provide-helper-for-counting-extents-from-if_bytes.patch
xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch
xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch
xfs-use-gpf_nofs-when-allocating-btree-cursors.patch
xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch
xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch
xfs-use-the-actual-ag-length-when-reserving-blocks.patch

33 files changed:
queue-4.9/series
queue-4.9/xfs-always-succeed-when-deduping-zero-bytes.patch [new file with mode: 0644]
queue-4.9/xfs-check-for-bogus-values-in-btree-block-headers.patch [new file with mode: 0644]
queue-4.9/xfs-check-minimum-block-size-for-crc-filesystems.patch [new file with mode: 0644]
queue-4.9/xfs-check-return-value-of-_trans_reserve_quota_nblks.patch [new file with mode: 0644]
queue-4.9/xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch [new file with mode: 0644]
queue-4.9/xfs-complain-if-we-don-t-get-nextents-bmap-records.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-allow-di_size-with-high-bit-set.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-cap-maximum-dedupe-request-length.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch [new file with mode: 0644]
queue-4.9/xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch [new file with mode: 0644]
queue-4.9/xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch [new file with mode: 0644]
queue-4.9/xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch [new file with mode: 0644]
queue-4.9/xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch [new file with mode: 0644]
queue-4.9/xfs-fix-double-cleanup-when-cui-recovery-fails.patch [new file with mode: 0644]
queue-4.9/xfs-fix-max_retries-_show-and-_store-functions.patch [new file with mode: 0644]
queue-4.9/xfs-fix-unbalanced-inode-reclaim-flush-locking.patch [new file with mode: 0644]
queue-4.9/xfs-forbid-ag-btrees-with-level-0.patch [new file with mode: 0644]
queue-4.9/xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch [new file with mode: 0644]
queue-4.9/xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch [new file with mode: 0644]
queue-4.9/xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch [new file with mode: 0644]
queue-4.9/xfs-new-inode-extent-list-lookup-helpers.patch [new file with mode: 0644]
queue-4.9/xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch [new file with mode: 0644]
queue-4.9/xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch [new file with mode: 0644]
queue-4.9/xfs-provide-helper-for-counting-extents-from-if_bytes.patch [new file with mode: 0644]
queue-4.9/xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch [new file with mode: 0644]
queue-4.9/xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch [new file with mode: 0644]
queue-4.9/xfs-use-gpf_nofs-when-allocating-btree-cursors.patch [new file with mode: 0644]
queue-4.9/xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch [new file with mode: 0644]
queue-4.9/xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch [new file with mode: 0644]
queue-4.9/xfs-use-the-actual-ag-length-when-reserving-blocks.patch [new file with mode: 0644]

index e01dd8ffcb8d1e56521f5baada6dfb134a269468..11f0ded7f9ea236fe54455e6cb34dd53b94b7668 100644 (file)
@@ -170,3 +170,35 @@ drm-i915-disable-psr-by-default-on-hsw-bdw.patch
 drm-i915-gen9-unconditionally-apply-the-memory-bandwidth-wa.patch
 drm-i915-gen9-fix-the-wm-memory-bandwidth-wa-for-y-tiling-cases.patch
 tpm_tis-check-return-values-from-get_burstcount.patch
+xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch
+xfs-check-return-value-of-_trans_reserve_quota_nblks.patch
+xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch
+xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch
+xfs-provide-helper-for-counting-extents-from-if_bytes.patch
+xfs-check-minimum-block-size-for-crc-filesystems.patch
+xfs-fix-unbalanced-inode-reclaim-flush-locking.patch
+xfs-new-inode-extent-list-lookup-helpers.patch
+xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch
+xfs-always-succeed-when-deduping-zero-bytes.patch
+xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch
+xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch
+xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch
+xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch
+xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch
+xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch
+xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch
+xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch
+xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch
+xfs-forbid-ag-btrees-with-level-0.patch
+xfs-check-for-bogus-values-in-btree-block-headers.patch
+xfs-complain-if-we-don-t-get-nextents-bmap-records.patch
+xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch
+xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch
+xfs-don-t-allow-di_size-with-high-bit-set.patch
+xfs-don-t-cap-maximum-dedupe-request-length.patch
+xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch
+xfs-use-gpf_nofs-when-allocating-btree-cursors.patch
+xfs-fix-double-cleanup-when-cui-recovery-fails.patch
+xfs-use-the-actual-ag-length-when-reserving-blocks.patch
+xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch
+xfs-fix-max_retries-_show-and-_store-functions.patch
diff --git a/queue-4.9/xfs-always-succeed-when-deduping-zero-bytes.patch b/queue-4.9/xfs-always-succeed-when-deduping-zero-bytes.patch
new file mode 100644 (file)
index 0000000..4ad3fc3
--- /dev/null
@@ -0,0 +1,44 @@
+From hch@lst.de  Tue Jan 10 11:24:37 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:41 +0100
+Subject: xfs: always succeed when deduping zero bytes
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-11-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit fba3e594ef0ad911fa8f559732d588172f212d71 upstream.
+
+It turns out that btrfs and xfs had differing interpretations of what
+to do when the dedupe length is zero.  Change xfs to follow btrfs'
+semantics so that the userland interface is consistent.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1345,8 +1345,14 @@ xfs_reflink_remap_range(
+               goto out_unlock;
+       }
+-      if (len == 0)
++      /* Zero length dedupe exits immediately; reflink goes to EOF. */
++      if (len == 0) {
++              if (is_dedupe) {
++                      ret = 0;
++                      goto out_unlock;
++              }
+               len = isize - pos_in;
++      }
+       /* Ensure offsets don't wrap and the input is inside i_size */
+       if (pos_in + len < pos_in || pos_out + len < pos_out ||
diff --git a/queue-4.9/xfs-check-for-bogus-values-in-btree-block-headers.patch b/queue-4.9/xfs-check-for-bogus-values-in-btree-block-headers.patch
new file mode 100644 (file)
index 0000000..2faccbc
--- /dev/null
@@ -0,0 +1,55 @@
+From hch@lst.de  Tue Jan 10 11:29:26 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:52 +0100
+Subject: xfs: check for bogus values in btree block headers
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-22-git-send-email-hch@lst.de>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit bb3be7e7c1c18e1b141d4cadeb98cc89ecf78099 upstream.
+
+When we're reading a btree block, make sure that what we retrieved
+matches the owner and level; and has a plausible number of records.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block(
+       if (error)
+               return error;
++      /* Check the inode owner since the verifiers don't. */
++      if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
++          (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
++          be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
++                      cur->bc_private.b.ip->i_ino)
++              goto out_bad;
++
++      /* Did we get the level we were looking for? */
++      if (be16_to_cpu((*blkp)->bb_level) != level)
++              goto out_bad;
++
++      /* Check that internal nodes have at least one record. */
++      if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
++              goto out_bad;
++
+       xfs_btree_setbuf(cur, level, bp);
+       return 0;
++
++out_bad:
++      *blkp = NULL;
++      xfs_trans_brelse(cur->bc_tp, bp);
++      return -EFSCORRUPTED;
+ }
+ /*
diff --git a/queue-4.9/xfs-check-minimum-block-size-for-crc-filesystems.patch b/queue-4.9/xfs-check-minimum-block-size-for-crc-filesystems.patch
new file mode 100644 (file)
index 0000000..0e5a9b7
--- /dev/null
@@ -0,0 +1,59 @@
+From hch@lst.de  Tue Jan 10 11:23:44 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:37 +0100
+Subject: xfs: check minimum block size for CRC filesystems
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-7-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit bec9d48d7a303a5bb95c05961ff07ec7eeb59058 upstream.
+
+Check the minimum block size on v5 filesystems.
+
+[dchinner: cleaned up XFS_MIN_CRC_BLOCKSIZE check]
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_sb.c    |    6 ++++++
+ fs/xfs/libxfs/xfs_types.h |    3 +++
+ 2 files changed, 9 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -262,6 +262,12 @@ xfs_mount_validate_sb(
+               return -EFSCORRUPTED;
+       }
++      if (xfs_sb_version_hascrc(&mp->m_sb) &&
++          sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
++              xfs_notice(mp, "v5 SB sanity check failed");
++              return -EFSCORRUPTED;
++      }
++
+       /*
+        * Until this is fixed only page-sized or smaller data blocks work.
+        */
+--- a/fs/xfs/libxfs/xfs_types.h
++++ b/fs/xfs/libxfs/xfs_types.h
+@@ -75,11 +75,14 @@ typedef __int64_t  xfs_sfiloff_t;  /* sign
+  * Minimum and maximum blocksize and sectorsize.
+  * The blocksize upper limit is pretty much arbitrary.
+  * The sectorsize upper limit is due to sizeof(sb_sectsize).
++ * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes
++ * cannot be used.
+  */
+ #define XFS_MIN_BLOCKSIZE_LOG 9       /* i.e. 512 bytes */
+ #define XFS_MAX_BLOCKSIZE_LOG 16      /* i.e. 65536 bytes */
+ #define XFS_MIN_BLOCKSIZE     (1 << XFS_MIN_BLOCKSIZE_LOG)
+ #define XFS_MAX_BLOCKSIZE     (1 << XFS_MAX_BLOCKSIZE_LOG)
++#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1))
+ #define XFS_MIN_SECTORSIZE_LOG        9       /* i.e. 512 bytes */
+ #define XFS_MAX_SECTORSIZE_LOG        15      /* i.e. 32768 bytes */
+ #define XFS_MIN_SECTORSIZE    (1 << XFS_MIN_SECTORSIZE_LOG)
diff --git a/queue-4.9/xfs-check-return-value-of-_trans_reserve_quota_nblks.patch b/queue-4.9/xfs-check-return-value-of-_trans_reserve_quota_nblks.patch
new file mode 100644 (file)
index 0000000..29902b9
--- /dev/null
@@ -0,0 +1,39 @@
+From hch@lst.de  Tue Jan 10 11:22:36 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:33 +0100
+Subject: xfs: check return value of _trans_reserve_quota_nblks
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-3-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 4fd29ec47212c8cbf98916af519019ccc5e58e49 upstream.
+
+Check the return value of xfs_trans_reserve_quota_nblks for errors.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4902,8 +4902,11 @@ xfs_bmap_del_extent_delay(
+        * sb counters as we might have to borrow some blocks for the
+        * indirect block accounting.
+        */
+-      xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
++      error = xfs_trans_reserve_quota_nblks(NULL, ip,
++                      -((long)del->br_blockcount), 0,
+                       isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
++      if (error)
++              return error;
+       ip->i_delayed_blks -= del->br_blockcount;
+       if (whichfork == XFS_COW_FORK)
diff --git a/queue-4.9/xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch b/queue-4.9/xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch
new file mode 100644 (file)
index 0000000..6146031
--- /dev/null
@@ -0,0 +1,98 @@
+From hch@lst.de  Tue Jan 10 11:25:29 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:45 +0100
+Subject: xfs: clean up cow fork reservation and tag inodes correctly
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-15-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 0260d8ff5f76617e3a55a1c471383ecb4404c3ad upstream.
+
+COW fork reservation is implemented via delayed allocation. The code is
+modeled after the traditional delalloc allocation code, but is slightly
+different in terms of how preallocation occurs. Rather than post-eof
+speculative preallocation, COW fork preallocation is implemented via a
+COW extent size hint that is designed to minimize fragmentation as a
+reflinked file is split over time.
+
+xfs_reflink_reserve_cow() still uses logic that is oriented towards
+dealing with post-eof speculative preallocation, however, and is stale
+or not necessarily correct. First, the EOF alignment to the COW extent
+size hint is implemented in xfs_bmapi_reserve_delalloc() (which does so
+correctly by aligning the start and end offsets) and so is not necessary
+in xfs_reflink_reserve_cow(). The backoff and retry logic on ENOSPC is
+also ineffective for the same reason, as xfs_bmapi_reserve_delalloc()
+will simply perform the same allocation request on the retry. Finally,
+since the COW extent size hint aligns the start and end offset of the
+range to allocate, the end_fsb != orig_end_fsb logic is not sufficient.
+Indeed, if a write request happens to end on an aligned offset, it is
+possible that we do not tag the inode for COW preallocation even though
+xfs_bmapi_reserve_delalloc() may have preallocated at the start offset.
+
+Kill the unnecessary, duplicate code in xfs_reflink_reserve_cow().
+Remove the inode tag logic as well since xfs_bmapi_reserve_delalloc()
+has been updated to tag the inode correctly.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c |   29 +++--------------------------
+ 1 file changed, 3 insertions(+), 26 deletions(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -245,11 +245,9 @@ xfs_reflink_reserve_cow(
+ {
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec    got;
+-      xfs_fileoff_t           end_fsb, orig_end_fsb;
+       int                     error = 0;
+       bool                    eof = false, trimmed;
+       xfs_extnum_t            idx;
+-      xfs_extlen_t            align;
+       /*
+        * Search the COW fork extent list first.  This serves two purposes:
+@@ -287,33 +285,12 @@ xfs_reflink_reserve_cow(
+       if (error)
+               return error;
+-      end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
+-
+-      align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
+-      if (align)
+-              end_fsb = roundup_64(end_fsb, align);
+-
+-retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+-                      end_fsb - imap->br_startoff, 0, &got, &idx, eof);
+-      switch (error) {
+-      case 0:
+-              break;
+-      case -ENOSPC:
+-      case -EDQUOT:
+-              /* retry without any preallocation */
++                      imap->br_blockcount, 0, &got, &idx, eof);
++      if (error == -ENOSPC || error == -EDQUOT)
+               trace_xfs_reflink_cow_enospc(ip, imap);
+-              if (end_fsb != orig_end_fsb) {
+-                      end_fsb = orig_end_fsb;
+-                      goto retry;
+-              }
+-              /*FALLTHRU*/
+-      default:
++      if (error)
+               return error;
+-      }
+-
+-      if (end_fsb != orig_end_fsb)
+-              xfs_inode_set_cowblocks_tag(ip);
+       trace_xfs_reflink_cow_alloc(ip, &got);
+       return 0;
diff --git a/queue-4.9/xfs-complain-if-we-don-t-get-nextents-bmap-records.patch b/queue-4.9/xfs-complain-if-we-don-t-get-nextents-bmap-records.patch
new file mode 100644 (file)
index 0000000..6c68662
--- /dev/null
@@ -0,0 +1,42 @@
+From hch@lst.de  Tue Jan 10 11:29:49 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:53 +0100
+Subject: xfs: complain if we don't get nextents bmap records
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-23-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 356a3225222e5bc4df88aef3419fb6424f18ab69 upstream.
+
+When reading into memory all extents of a btree-format inode fork,
+complain if the number of extents we find is not the same as the number
+of extents reported in the inode core.  This is needed to stop an IO
+action from accessing the garbage areas of the in-core fork.
+
+[dchinner: removed redundant assert]
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -1377,8 +1377,9 @@ xfs_bmap_read_extents(
+                       return error;
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
++      if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
++              return -EFSCORRUPTED;
+       ASSERT(i == xfs_iext_count(ifp));
+-      ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+       XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
+       return 0;
+ error0:
diff --git a/queue-4.9/xfs-don-t-allow-di_size-with-high-bit-set.patch b/queue-4.9/xfs-don-t-allow-di_size-with-high-bit-set.patch
new file mode 100644 (file)
index 0000000..b6ccbb2
--- /dev/null
@@ -0,0 +1,45 @@
+From hch@lst.de  Tue Jan 10 11:30:18 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:56 +0100
+Subject: xfs: don't allow di_size with high bit set
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-26-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit ef388e2054feedaeb05399ed654bdb06f385d294 upstream.
+
+The on-disk field di_size is used to set i_size, which is a signed
+integer of loff_t.  If the high bit of di_size is set, we'll end up with
+a negative i_size, which will cause all sorts of problems.  Since the
+VFS won't let us create a file with such length, we should catch them
+here in the verifier too.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -392,6 +392,14 @@ xfs_dinode_verify(
+       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
+               return false;
++      /* don't allow invalid i_size */
++      if (be64_to_cpu(dip->di_size) & (1ULL << 63))
++              return false;
++
++      /* No zero-length symlinks. */
++      if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
++              return false;
++
+       /* only version 3 or greater inodes are extensively verified here */
+       if (dip->di_version < 3)
+               return true;
diff --git a/queue-4.9/xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch b/queue-4.9/xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch
new file mode 100644 (file)
index 0000000..94c4aad
--- /dev/null
@@ -0,0 +1,89 @@
+From hch@lst.de  Tue Jan 10 11:23:14 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:35 +0100
+Subject: xfs: don't BUG() on mixed direct and mapped I/O
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-5-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 04197b341f23b908193308b8d63d17ff23232598 upstream.
+
+We've had reports of generic/095 causing XFS to BUG() in
+__xfs_get_blocks() due to the existence of delalloc blocks on a
+direct I/O read. generic/095 issues a mix of various types of I/O,
+including direct and memory mapped I/O to a single file. This is
+clearly not supported behavior and is known to lead to such
+problems. E.g., the lack of exclusion between the direct I/O and
+write fault paths means that a write fault can allocate delalloc
+blocks in a region of a file that was previously a hole after the
+direct read has attempted to flush/inval the file range, but before
+it actually reads the block mapping. In turn, the direct read
+discovers a delalloc extent and cannot proceed.
+
+While the appropriate solution here is to not mix direct and memory
+mapped I/O to the same regions of the same file, the current
+BUG_ON() behavior is probably overkill as it can crash the entire
+system.  Instead, localize the failure to the I/O in question by
+returning an error for a direct I/O that cannot be handled safely
+due to delalloc blocks. Be careful to allow the case of a direct
+write to post-eof delalloc blocks. This can occur due to speculative
+preallocation and is safe as post-eof blocks are not accompanied by
+dirty pages in pagecache (conversely, preallocation within eof must
+have been zeroed, and thus dirtied, before the inode size could have
+been increased beyond said blocks).
+
+Finally, provide an additional warning if a direct I/O write occurs
+while the file is memory mapped. This may not catch all problematic
+scenarios, but provides a hint that some known-to-be-problematic I/O
+methods are in use.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_aops.c |   22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -1361,6 +1361,26 @@ __xfs_get_blocks(
+       if (error)
+               goto out_unlock;
++      /*
++       * The only time we can ever safely find delalloc blocks on direct I/O
++       * is a dio write to post-eof speculative preallocation. All other
++       * scenarios are indicative of a problem or misuse (such as mixing
++       * direct and mapped I/O).
++       *
++       * The file may be unmapped by the time we get here so we cannot
++       * reliably fail the I/O based on mapping. Instead, fail the I/O if this
++       * is a read or a write within eof. Otherwise, carry on but warn as a
++       * precuation if the file happens to be mapped.
++       */
++      if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
++              if (!create || offset < i_size_read(VFS_I(ip))) {
++                      WARN_ON_ONCE(1);
++                      error = -EIO;
++                      goto out_unlock;
++              }
++              WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
++      }
++
+       /* for DAX, we convert unwritten extents directly */
+       if (create &&
+           (!nimaps ||
+@@ -1450,8 +1470,6 @@ __xfs_get_blocks(
+            (new || ISUNWRITTEN(&imap))))
+               set_buffer_new(bh_result);
+-      BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
+-
+       return 0;
+ out_unlock:
diff --git a/queue-4.9/xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch b/queue-4.9/xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch
new file mode 100644 (file)
index 0000000..2b3a5be
--- /dev/null
@@ -0,0 +1,69 @@
+From hch@lst.de  Tue Jan 10 11:09:49 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:32 +0100
+Subject: xfs: don't call xfs_sb_quota_from_disk twice
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-2-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit e6fc6fcf4447c9266038c55c25e4c7c14bee110c upstream.
+
+Source xfsprogs commit: ee3754254e8c186c99b6cdd4d59f741759d04acb
+
+Kernel commit 5ef828c4 ("xfs: avoid false quotacheck after unclean
+shutdown") made xfs_sb_from_disk() also call xfs_sb_quota_from_disk
+by default.
+
+However, when this was merged to libxfs, existing separate
+calls to libxfs_sb_quota_from_disk remained, and calling it
+twice in a row on a V4 superblock leads to issues, because:
+
+        if (sbp->sb_qflags & XFS_PQUOTA_ACCT)  {
+...
+                sbp->sb_pquotino = sbp->sb_gquotino;
+                sbp->sb_gquotino = NULLFSINO;
+
+and after the second call, we have set both pquotino and gquotino
+to NULLFSINO.
+
+Fix this by making it safe to call twice, and also remove the extra
+calls to libxfs_sb_quota_from_disk.
+
+This is only spotted when running xfstests with "-m crc=0" because
+the sb_from_disk change came about after V5 became default, and
+the above behavior only exists on a V4 superblock.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_sb.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -338,13 +338,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sb
+                                       XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
+       sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
+-      if (sbp->sb_qflags & XFS_PQUOTA_ACCT)  {
++      if (sbp->sb_qflags & XFS_PQUOTA_ACCT &&
++          sbp->sb_gquotino != NULLFSINO)  {
+               /*
+                * In older version of superblock, on-disk superblock only
+                * has sb_gquotino, and in-core superblock has both sb_gquotino
+                * and sb_pquotino. But, only one of them is supported at any
+                * point of time. So, if PQUOTA is set in disk superblock,
+-               * copy over sb_gquotino to sb_pquotino.
++               * copy over sb_gquotino to sb_pquotino.  The NULLFSINO test
++               * above is to make sure we don't do this twice and wipe them
++               * both out!
+                */
+               sbp->sb_pquotino = sbp->sb_gquotino;
+               sbp->sb_gquotino = NULLFSINO;
diff --git a/queue-4.9/xfs-don-t-cap-maximum-dedupe-request-length.patch b/queue-4.9/xfs-don-t-cap-maximum-dedupe-request-length.patch
new file mode 100644 (file)
index 0000000..d19e588
--- /dev/null
@@ -0,0 +1,52 @@
+From hch@lst.de  Tue Jan 10 11:30:27 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:57 +0100
+Subject: xfs: don't cap maximum dedupe request length
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-27-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 1bb33a98702d8360947f18a44349df75ba555d5d upstream.
+
+After various discussions on linux-fsdevel, it has been decided that it
+is not necessary to cap the length of a dedupe request, and that
+correctly-written userspace client programs will be able to absorb the
+change.  Therefore, remove the length clamping behavior.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c |    9 ---------
+ 1 file changed, 9 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -939,7 +939,6 @@ xfs_file_clone_range(
+                                    len, false);
+ }
+-#define XFS_MAX_DEDUPE_LEN    (16 * 1024 * 1024)
+ STATIC ssize_t
+ xfs_file_dedupe_range(
+       struct file     *src_file,
+@@ -950,14 +949,6 @@ xfs_file_dedupe_range(
+ {
+       int             error;
+-      /*
+-       * Limit the total length we will dedupe for each operation.
+-       * This is intended to bound the total time spent in this
+-       * ioctl to something sane.
+-       */
+-      if (len > XFS_MAX_DEDUPE_LEN)
+-              len = XFS_MAX_DEDUPE_LEN;
+-
+       error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+                                    len, true);
+       if (error)
diff --git a/queue-4.9/xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch b/queue-4.9/xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch
new file mode 100644 (file)
index 0000000..9e9c1b3
--- /dev/null
@@ -0,0 +1,39 @@
+From hch@lst.de  Tue Jan 10 11:29:59 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:54 +0100
+Subject: xfs: don't crash if reading a directory results in an unexpected hole
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-24-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 96a3aefb8ffde23180130460b0b2407b328eb727 upstream.
+
+In xfs_dir3_data_read, we can encounter the situation where err == 0 and
+*bpp == NULL if the given bno offset happens to be a hole; this leads to
+a crash if we try to set the buffer type after the _da_read_buf call.
+Holes can happen due to corrupt or malicious entries in the bmbt data,
+so be a little more careful when we're handling buffers.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_data.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_data.c
++++ b/fs/xfs/libxfs/xfs_dir2_data.c
+@@ -329,7 +329,7 @@ xfs_dir3_data_read(
+       err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+-      if (!err && tp)
++      if (!err && tp && *bpp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
+       return err;
+ }
diff --git a/queue-4.9/xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch b/queue-4.9/xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch
new file mode 100644 (file)
index 0000000..8430d9f
--- /dev/null
@@ -0,0 +1,121 @@
+From hch@lst.de  Tue Jan 10 11:23:00 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:34 +0100
+Subject: xfs: don't skip cow forks w/ delalloc blocks in cowblocks scan
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-4-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 399372349a7f9b2d7e56e4fa4467c69822d07024 upstream.
+
+The cowblocks background scanner currently clears the cowblocks tag
+for inodes without any real allocations in the cow fork. This
+excludes inodes with only delalloc blocks in the cow fork. While we
+might never expect to clear delalloc blocks from the cow fork in the
+background scanner, it is not necessarily correct to clear the
+cowblocks tag from such inodes.
+
+For example, if the background scanner happens to process an inode
+between a buffered write and writeback, the scanner catches the
+inode in a state after delalloc blocks have been allocated to the
+cow fork but before the delalloc blocks have been converted to real
+blocks by writeback. The background scanner then incorrectly clears
+the cowblocks tag, even if part of the aforementioned delalloc
+reservation will not be remapped to the data fork (i.e., extra
+blocks due to the cowextsize hint). This means that any such
+additional blocks in the cow fork might never be reclaimed by the
+background scanner and could persist until the inode itself is
+reclaimed.
+
+To address this problem, only skip and clear inodes without any cow
+fork allocations whatsoever from the background scanner. While we
+generally do not want to cancel delalloc reservations from the
+background scanner, the pagecache dirty check following the
+cowblocks check should prevent that situation. If we do end up with
+delalloc cow fork blocks without a dirty address space mapping, this
+is probably an indication that something has gone wrong and the
+blocks should be reclaimed, as they may never be converted to a real
+allocation.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c  |    7 ++++++-
+ fs/xfs/xfs_reflink.c |   34 ----------------------------------
+ fs/xfs/xfs_reflink.h |    2 --
+ 3 files changed, 6 insertions(+), 37 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1580,10 +1580,15 @@ xfs_inode_free_cowblocks(
+       struct xfs_eofblocks *eofb = args;
+       bool need_iolock = true;
+       int match;
++      struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
+-      if (!xfs_reflink_has_real_cow_blocks(ip)) {
++      /*
++       * Just clear the tag if we have an empty cow fork or none at all. It's
++       * possible the inode was fully unshared since it was originally tagged.
++       */
++      if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
+               trace_xfs_inode_free_cowblocks_invalid(ip);
+               xfs_inode_clear_cowblocks_tag(ip);
+               return 0;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1697,37 +1697,3 @@ out:
+       trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
+       return error;
+ }
+-
+-/*
+- * Does this inode have any real CoW reservations?
+- */
+-bool
+-xfs_reflink_has_real_cow_blocks(
+-      struct xfs_inode                *ip)
+-{
+-      struct xfs_bmbt_irec            irec;
+-      struct xfs_ifork                *ifp;
+-      struct xfs_bmbt_rec_host        *gotp;
+-      xfs_extnum_t                    idx;
+-
+-      if (!xfs_is_reflink_inode(ip))
+-              return false;
+-
+-      /* Go find the old extent in the CoW fork. */
+-      ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+-      gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
+-      while (gotp) {
+-              xfs_bmbt_get_all(gotp, &irec);
+-
+-              if (!isnullstartblock(irec.br_startblock))
+-                      return true;
+-
+-              /* Roll on... */
+-              idx++;
+-              if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+-                      break;
+-              gotp = xfs_iext_get_ext(ifp, idx);
+-      }
+-
+-      return false;
+-}
+--- a/fs/xfs/xfs_reflink.h
++++ b/fs/xfs/xfs_reflink.h
+@@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(
+ extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
+               xfs_off_t len);
+-extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip);
+-
+ #endif /* __XFS_REFLINK_H */
diff --git a/queue-4.9/xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch b/queue-4.9/xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch
new file mode 100644 (file)
index 0000000..638aa9b
--- /dev/null
@@ -0,0 +1,48 @@
+From hch@lst.de  Tue Jan 10 11:30:09 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:55 +0100
+Subject: xfs: error out if trying to add attrs and anextents > 0
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-25-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 0f352f8ee8412bd9d34fb2a6411241da61175c0e upstream.
+
+We shouldn't assert if somehow we end up trying to add an attr fork to
+an inode that apparently already has attr extents because this is an
+indication of on-disk corruption.  Instead, return an error code to
+userspace.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -1153,6 +1153,10 @@ xfs_bmap_add_attrfork(
+               goto trans_cancel;
+       if (XFS_IFORK_Q(ip))
+               goto trans_cancel;
++      if (ip->i_d.di_anextents != 0) {
++              error = -EFSCORRUPTED;
++              goto trans_cancel;
++      }
+       if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
+               /*
+                * For inodes coming from pre-6.2 filesystems.
+@@ -1160,7 +1164,6 @@ xfs_bmap_add_attrfork(
+               ASSERT(ip->i_d.di_aformat == 0);
+               ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+       }
+-      ASSERT(ip->i_d.di_anextents == 0);
+       xfs_trans_ijoin(tp, ip, 0);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/queue-4.9/xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch b/queue-4.9/xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch
new file mode 100644 (file)
index 0000000..85f3510
--- /dev/null
@@ -0,0 +1,75 @@
+From hch@lst.de  Tue Jan 10 11:24:25 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:40 +0100
+Subject: xfs: factor rmap btree size into the indlen calculations
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-10-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit fd26a88093bab6529ea2de819114ca92dbd1d71d upstream.
+
+When we're estimating the amount of space it's going to take to satisfy
+a delalloc reservation, we need to include the space that we might need
+to grow the rmapbt.  This helps us to avoid running out of space later
+when _iomap_write_allocate needs more space than we reserved.  Eryu Guan
+observed this happening on generic/224 when sunit/swidth were set.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -49,6 +49,7 @@
+ #include "xfs_rmap.h"
+ #include "xfs_ag_resv.h"
+ #include "xfs_refcount.h"
++#include "xfs_rmap_btree.h"
+ kmem_zone_t           *xfs_bmap_free_item_zone;
+@@ -190,8 +191,12 @@ xfs_bmap_worst_indlen(
+       int             maxrecs;        /* maximum record count at this level */
+       xfs_mount_t     *mp;            /* mount structure */
+       xfs_filblks_t   rval;           /* return value */
++      xfs_filblks_t   orig_len;
+       mp = ip->i_mount;
++
++      /* Calculate the worst-case size of the bmbt. */
++      orig_len = len;
+       maxrecs = mp->m_bmap_dmxr[0];
+       for (level = 0, rval = 0;
+            level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
+@@ -199,12 +204,20 @@ xfs_bmap_worst_indlen(
+               len += maxrecs - 1;
+               do_div(len, maxrecs);
+               rval += len;
+-              if (len == 1)
+-                      return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
++              if (len == 1) {
++                      rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+                               level - 1;
++                      break;
++              }
+               if (level == 0)
+                       maxrecs = mp->m_bmap_dmxr[1];
+       }
++
++      /* Calculate the worst-case size of the rmapbt. */
++      if (xfs_sb_version_hasrmapbt(&mp->m_sb))
++              rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
++                              mp->m_rmap_maxlevels;
++
+       return rval;
+ }
diff --git a/queue-4.9/xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch b/queue-4.9/xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch
new file mode 100644 (file)
index 0000000..e50925a
--- /dev/null
@@ -0,0 +1,48 @@
+From hch@lst.de  Tue Jan 10 11:31:19 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:39:02 +0100
+Subject: xfs: fix crash and data corruption due to removal of busy COW extents
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-32-git-send-email-hch@lst.de>
+
+
+commit a1b7a4dea6166cf46be895bce4aac67ea5160fe8 upstream.
+
+There is a race window between write_cache_pages calling
+clear_page_dirty_for_io and XFS calling set_page_writeback, in which
+the mapping for an inode is tagged neither as dirty, nor as writeback.
+
+If the COW shrinker hits in exactly that window we'll remove the delayed
+COW extents and writepages trying to write it back, which in release
+kernels will manifest as corruption of the bmap btree, and in debug
+kernels will trip the ASSERT about now calling xfs_bmapi_write with the
+COWFORK flag for holes.  A complex customer load manages to hit this
+window fairly reliably, probably by always having COW writeback in flight
+while the cow shrinker runs.
+
+This patch adds another check for having the I_DIRTY_PAGES flag set,
+which is still set during this race window.  While this fixes the problem
+I'm still not overly happy about the way the COW shrinker works as it
+still seems a bit fragile.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1599,7 +1599,8 @@ xfs_inode_free_cowblocks(
+        * If the mapping is dirty or under writeback we cannot touch the
+        * CoW fork.  Leave it alone if we're in the midst of a directio.
+        */
+-      if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
++      if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
++          mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+           mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+           atomic_read(&VFS_I(ip)->i_dio_count))
+               return 0;
diff --git a/queue-4.9/xfs-fix-double-cleanup-when-cui-recovery-fails.patch b/queue-4.9/xfs-fix-double-cleanup-when-cui-recovery-fails.patch
new file mode 100644 (file)
index 0000000..e4debd3
--- /dev/null
@@ -0,0 +1,43 @@
+From hch@lst.de  Tue Jan 10 11:31:00 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:39:00 +0100
+Subject: xfs: fix double-cleanup when CUI recovery fails
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-30-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 7a21272b088894070391a94fdd1c67014020fa1d upstream.
+
+Dan Carpenter reported a double-free of rcur if _defer_finish fails
+while we're recovering CUI items.  Fix the error recovery to prevent
+this.
+
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_refcount_item.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -526,13 +526,14 @@ xfs_cui_recover(
+       xfs_refcount_finish_one_cleanup(tp, rcur, error);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
+       if (error)
+-              goto abort_error;
++              goto abort_defer;
+       set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+       error = xfs_trans_commit(tp);
+       return error;
+ abort_error:
+       xfs_refcount_finish_one_cleanup(tp, rcur, error);
++abort_defer:
+       xfs_defer_cancel(&dfops);
+       xfs_trans_cancel(tp);
+       return error;
diff --git a/queue-4.9/xfs-fix-max_retries-_show-and-_store-functions.patch b/queue-4.9/xfs-fix-max_retries-_show-and-_store-functions.patch
new file mode 100644 (file)
index 0000000..f501743
--- /dev/null
@@ -0,0 +1,45 @@
+From hch@lst.de  Tue Jan 10 11:31:32 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:39:03 +0100
+Subject: xfs: fix max_retries _show and _store functions
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Carlos Maiolino <cmaiolino@redhat.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-33-git-send-email-hch@lst.de>
+
+
+From: Carlos Maiolino <cmaiolino@redhat.com>
+
+commit ff97f2399edac1e0fb3fa7851d5fbcbdf04717cf upstream.
+
+max_retries _show and _store functions should test against cfg->max_retries,
+not cfg->retry_timeout
+
+Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_sysfs.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_sysfs.c
++++ b/fs/xfs/xfs_sysfs.c
+@@ -396,7 +396,7 @@ max_retries_show(
+       int             retries;
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+-      if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER)
++      if (cfg->max_retries == XFS_ERR_RETRY_FOREVER)
+               retries = -1;
+       else
+               retries = cfg->max_retries;
+@@ -422,7 +422,7 @@ max_retries_store(
+               return -EINVAL;
+       if (val == -1)
+-              cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
++              cfg->max_retries = XFS_ERR_RETRY_FOREVER;
+       else
+               cfg->max_retries = val;
+       return count;
diff --git a/queue-4.9/xfs-fix-unbalanced-inode-reclaim-flush-locking.patch b/queue-4.9/xfs-fix-unbalanced-inode-reclaim-flush-locking.patch
new file mode 100644 (file)
index 0000000..c387702
--- /dev/null
@@ -0,0 +1,153 @@
+From hch@lst.de  Tue Jan 10 11:23:57 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:38 +0100
+Subject: xfs: fix unbalanced inode reclaim flush locking
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-8-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 98efe8af1c9ffac47e842b7a75ded903e2f028da upstream.
+
+Filesystem shutdown testing on an older distro kernel has uncovered an
+imbalanced locking pattern for the inode flush lock in
+xfs_reclaim_inode(). Specifically, there is a double unlock sequence
+between the call to xfs_iflush_abort() and xfs_reclaim_inode() at the
+"reclaim:" label.
+
+This actually does not cause obvious problems on current kernels due to
+the current flush lock implementation. Older kernels use a counting
+based flush lock mechanism, however, which effectively breaks the lock
+indefinitely when an already unlocked flush lock is repeatedly unlocked.
+Though this only currently occurs on filesystem shutdown, it has
+reproduced the effect of elevating an fs shutdown to a system-wide crash
+or hang.
+
+As it turns out, the flush lock is not actually required for the reclaim
+logic in xfs_reclaim_inode() because by that time we have already cycled
+the flush lock once while holding ILOCK_EXCL. Therefore, remove the
+additional flush lock/unlock cycle around the 'reclaim:' label and
+update branches into this label to release the flush lock where
+appropriate. Add an assert to xfs_ifunlock() to help prevent future
+occurences of the same problem.
+
+Reported-by: Zorro Lang <zlang@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c |   27 ++++++++++++++-------------
+ fs/xfs/xfs_inode.h  |   11 ++++++-----
+ 2 files changed, 20 insertions(+), 18 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -123,7 +123,6 @@ __xfs_inode_free(
+ {
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+-      ASSERT(!xfs_isiflocked(ip));
+       XFS_STATS_DEC(ip->i_mount, vn_active);
+       call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+@@ -133,6 +132,8 @@ void
+ xfs_inode_free(
+       struct xfs_inode        *ip)
+ {
++      ASSERT(!xfs_isiflocked(ip));
++
+       /*
+        * Because we use RCU freeing we need to ensure the inode always
+        * appears to be reclaimed with an invalid inode number when in the
+@@ -981,6 +982,7 @@ restart:
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_iunpin_wait(ip);
++              /* xfs_iflush_abort() drops the flush lock */
+               xfs_iflush_abort(ip, false);
+               goto reclaim;
+       }
+@@ -989,10 +991,10 @@ restart:
+                       goto out_ifunlock;
+               xfs_iunpin_wait(ip);
+       }
+-      if (xfs_iflags_test(ip, XFS_ISTALE))
+-              goto reclaim;
+-      if (xfs_inode_clean(ip))
++      if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
++              xfs_ifunlock(ip);
+               goto reclaim;
++      }
+       /*
+        * Never flush out dirty data during non-blocking reclaim, as it would
+@@ -1030,25 +1032,24 @@ restart:
+               xfs_buf_relse(bp);
+       }
+-      xfs_iflock(ip);
+ reclaim:
++      ASSERT(!xfs_isiflocked(ip));
++
+       /*
+        * Because we use RCU freeing we need to ensure the inode always appears
+        * to be reclaimed with an invalid inode number when in the free state.
+-       * We do this as early as possible under the ILOCK and flush lock so
+-       * that xfs_iflush_cluster() can be guaranteed to detect races with us
+-       * here. By doing this, we guarantee that once xfs_iflush_cluster has
+-       * locked both the XFS_ILOCK and the flush lock that it will see either
+-       * a valid, flushable inode that will serialise correctly against the
+-       * locks below, or it will see a clean (and invalid) inode that it can
+-       * skip.
++       * We do this as early as possible under the ILOCK so that
++       * xfs_iflush_cluster() can be guaranteed to detect races with us here.
++       * By doing this, we guarantee that once xfs_iflush_cluster has locked
++       * XFS_ILOCK that it will see either a valid, flushable inode that will
++       * serialise correctly, or it will see a clean (and invalid) inode that
++       * it can skip.
+        */
+       spin_lock(&ip->i_flags_lock);
+       ip->i_flags = XFS_IRECLAIM;
+       ip->i_ino = 0;
+       spin_unlock(&ip->i_flags_lock);
+-      xfs_ifunlock(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(
+  * Synchronize processes attempting to flush the in-core inode back to disk.
+  */
++static inline int xfs_isiflocked(struct xfs_inode *ip)
++{
++      return xfs_iflags_test(ip, XFS_IFLOCK);
++}
++
+ extern void __xfs_iflock(struct xfs_inode *ip);
+ static inline int xfs_iflock_nowait(struct xfs_inode *ip)
+@@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs
+ static inline void xfs_ifunlock(struct xfs_inode *ip)
+ {
++      ASSERT(xfs_isiflocked(ip));
+       xfs_iflags_clear(ip, XFS_IFLOCK);
+       smp_mb();
+       wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
+ }
+-static inline int xfs_isiflocked(struct xfs_inode *ip)
+-{
+-      return xfs_iflags_test(ip, XFS_IFLOCK);
+-}
+-
+ /*
+  * Flags for inode locking.
+  * Bit ranges:        1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
diff --git a/queue-4.9/xfs-forbid-ag-btrees-with-level-0.patch b/queue-4.9/xfs-forbid-ag-btrees-with-level-0.patch
new file mode 100644 (file)
index 0000000..a835de7
--- /dev/null
@@ -0,0 +1,78 @@
+From hch@lst.de  Tue Jan 10 11:29:15 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:51 +0100
+Subject: xfs: forbid AG btrees with level == 0
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-21-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit d2a047f31e86941fa896e0e3271536d50aba415e upstream.
+
+There is no such thing as a zero-level AG btree since even a single-node
+zero-records btree has one level.  Btree cursor constructors read
+cur_nlevels straight from disk and then access things like
+cur_bufs[cur_nlevels - 1] which is /really/ bad if cur_nlevels is zero!
+Therefore, strengthen the verifiers to prevent this possibility.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c  |   10 +++++++---
+ fs/xfs/libxfs/xfs_ialloc.c |    9 ++++++++-
+ 2 files changed, 15 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2455,12 +2455,15 @@ xfs_agf_verify(
+             be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+               return false;
+-      if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
++      if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
++          be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
++          be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
+           be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
+               return false;
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+-          be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
++          (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
++           be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
+               return false;
+       /*
+@@ -2477,7 +2480,8 @@ xfs_agf_verify(
+               return false;
+       if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+-          be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
++          (be32_to_cpu(agf->agf_refcount_level) < 1 ||
++           be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
+               return false;
+       return true;;
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2510,8 +2510,15 @@ xfs_agi_verify(
+       if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
+               return false;
+-      if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
++      if (be32_to_cpu(agi->agi_level) < 1 ||
++          be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
+               return false;
++
++      if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
++          (be32_to_cpu(agi->agi_free_level) < 1 ||
++           be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
++              return false;
++
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
diff --git a/queue-4.9/xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch b/queue-4.9/xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch
new file mode 100644 (file)
index 0000000..9ced25b
--- /dev/null
@@ -0,0 +1,54 @@
+From hch@lst.de  Tue Jan 10 11:26:23 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:50 +0100
+Subject: xfs: handle cow fork in xfs_bmap_trace_exlist
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-20-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit c44a1f22626c153976289e1cd67bdcdfefc16e1f upstream.
+
+By inspection, xfs_bmap_trace_exlist isn't handling cow forks,
+and will trace the data fork instead.
+
+Fix this by setting state appropriately if whichfork
+== XFS_COW_FORK.
+
+()___()
+< @ @ >
+ |   |
+ {o_o}
+  (|)
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -518,7 +518,7 @@ void
+ xfs_bmap_trace_exlist(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    cnt,            /* count of entries in the list */
+-      int             whichfork,      /* data or attr fork */
++      int             whichfork,      /* data or attr or cow fork */
+       unsigned long   caller_ip)
+ {
+       xfs_extnum_t    idx;            /* extent record index */
+@@ -527,6 +527,8 @@ xfs_bmap_trace_exlist(
+       if (whichfork == XFS_ATTR_FORK)
+               state |= BMAP_ATTRFORK;
++      else if (whichfork == XFS_COW_FORK)
++              state |= BMAP_COWFORK;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(cnt == xfs_iext_count(ifp));
diff --git a/queue-4.9/xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch b/queue-4.9/xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch
new file mode 100644 (file)
index 0000000..9761524
--- /dev/null
@@ -0,0 +1,58 @@
+From hch@lst.de  Tue Jan 10 11:30:37 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:58 +0100
+Subject: xfs: ignore leaf attr ichdr.count in verifier during log replay
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-28-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 2e1d23370e75d7d89350d41b4ab58c7f6a0e26b2 upstream.
+
+When we create a new attribute, we first create a shortform
+attribute, and try to fit the new attribute into it.
+If that fails, we copy the (empty) attribute into a leaf attribute,
+and do the copy again.  Thus there can be a transient state where
+we have an empty leaf attribute.
+
+If we encounter this during log replay, the verifier will fail.
+So add a test to ignore this part of the leaf attr verification
+during log replay.
+
+Thanks as usual to dchinner for spotting the problem.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -253,6 +253,7 @@ xfs_attr3_leaf_verify(
+ {
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_attr_leafblock *leaf = bp->b_addr;
++      struct xfs_perag *pag = bp->b_pag;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
+@@ -273,7 +274,12 @@ xfs_attr3_leaf_verify(
+               if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
+                       return false;
+       }
+-      if (ichdr.count == 0)
++      /*
++       * In recovery there is a transient state where count == 0 is valid
++       * because we may have transitioned an empty shortform attr to a leaf
++       * if the attr didn't fit in shortform.
++       */
++      if (pag && pag->pagf_init && ichdr.count == 0)
+               return false;
+       /* XXX: need to range check rest of attr header values */
diff --git a/queue-4.9/xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch b/queue-4.9/xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch
new file mode 100644 (file)
index 0000000..8cf1afc
--- /dev/null
@@ -0,0 +1,81 @@
+From hch@lst.de  Tue Jan 10 11:26:01 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:48 +0100
+Subject: xfs: Move AGI buffer type setting to xfs_read_agi
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-18-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 200237d6746faaeaf7f4ff4abbf13f3917cee60a upstream.
+
+We've missed properly setting the buffer type for
+an AGI transaction in 3 spots now, so just move it
+into xfs_read_agi() and set it if we are in a transaction
+to avoid the problem in the future.
+
+This is similar to how it is done in i.e. the dir3
+and attr3 read functions.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ialloc.c |    4 ++--
+ fs/xfs/xfs_inode.c         |    2 --
+ fs/xfs/xfs_log_recover.c   |    1 -
+ 3 files changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2450,8 +2450,6 @@ xfs_ialloc_log_agi(
+       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+ #endif
+-      xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
+-
+       /*
+        * Compute byte offsets for the first and last fields in the first
+        * region and log the agi buffer. This only logs up through
+@@ -2592,6 +2590,8 @@ xfs_read_agi(
+                       XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
+       if (error)
+               return error;
++      if (tp)
++              xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF);
+       xfs_buf_set_ref(*bpp, XFS_AGI_REF);
+       return 0;
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2041,7 +2041,6 @@ xfs_iunlink(
+       agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
+       offset = offsetof(xfs_agi_t, agi_unlinked) +
+               (sizeof(xfs_agino_t) * bucket_index);
+-      xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+       xfs_trans_log_buf(tp, agibp, offset,
+                         (offset + sizeof(xfs_agino_t) - 1));
+       return 0;
+@@ -2133,7 +2132,6 @@ xfs_iunlink_remove(
+               agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
+               offset = offsetof(xfs_agi_t, agi_unlinked) +
+                       (sizeof(xfs_agino_t) * bucket_index);
+-              xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+               xfs_trans_log_buf(tp, agibp, offset,
+                                 (offset + sizeof(xfs_agino_t) - 1));
+       } else {
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -4929,7 +4929,6 @@ xlog_recover_clear_agi_bucket(
+       agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+       offset = offsetof(xfs_agi_t, agi_unlinked) +
+                (sizeof(xfs_agino_t) * bucket);
+-      xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+       xfs_trans_log_buf(tp, agibp, offset,
+                         (offset + sizeof(xfs_agino_t) - 1));
diff --git a/queue-4.9/xfs-new-inode-extent-list-lookup-helpers.patch b/queue-4.9/xfs-new-inode-extent-list-lookup-helpers.patch
new file mode 100644 (file)
index 0000000..41c306e
--- /dev/null
@@ -0,0 +1,101 @@
+From hch@lst.de  Tue Jan 10 11:24:08 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:39 +0100
+Subject: xfs: new inode extent list lookup helpers
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-9-git-send-email-hch@lst.de>
+
+
+commit 93533c7855c3c78c8a900cac65c8d669bb14935d upstream.
+
+xfs_iext_lookup_extent looks up a single extent at the passed in offset,
+and returns the extent covering the area, or the one behind it in case
+of a hole, as well as the index of the returned extent in arguments,
+as well as a simple bool as return value that is set to false if no
+extent could be found because the offset is behind EOF.  It is a simpler
+replacement for xfs_bmap_search_extent that leaves looking up the rarely
+needed previous extent to the caller and has a nicer calling convention.
+
+xfs_iext_get_extent is a helper for iterating over the extent list,
+it takes an extent index as input, and returns the extent at that index
+in it's expanded form in an argument if it exists.  The actual return
+value is a bool whether the index is valid or not.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_fork.c |   46 +++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/libxfs/xfs_inode_fork.h |    6 +++++
+ 2 files changed, 52 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -2003,3 +2003,49 @@ xfs_ifork_init_cow(
+       ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
+       ip->i_cnextents = 0;
+ }
++
++/*
++ * Lookup the extent covering bno.
++ *
++ * If there is an extent covering bno return the extent index, and store the
++ * expanded extent structure in *gotp, and the extent index in *idx.
++ * If there is no extent covering bno, but there is an extent after it (e.g.
++ * it lies in a hole) return that extent in *gotp and its index in *idx
++ * instead.
++ * If bno is beyond the last extent return false, and return the index after
++ * the last valid index in *idxp.
++ */
++bool
++xfs_iext_lookup_extent(
++      struct xfs_inode        *ip,
++      struct xfs_ifork        *ifp,
++      xfs_fileoff_t           bno,
++      xfs_extnum_t            *idxp,
++      struct xfs_bmbt_irec    *gotp)
++{
++      struct xfs_bmbt_rec_host *ep;
++
++      XFS_STATS_INC(ip->i_mount, xs_look_exlist);
++
++      ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
++      if (!ep)
++              return false;
++      xfs_bmbt_get_all(ep, gotp);
++      return true;
++}
++
++/*
++ * Return true if there is an extent at index idx, and return the expanded
++ * extent structure at idx in that case.  Else return false.
++ */
++bool
++xfs_iext_get_extent(
++      struct xfs_ifork        *ifp,
++      xfs_extnum_t            idx,
++      struct xfs_bmbt_irec    *gotp)
++{
++      if (idx < 0 || idx >= xfs_iext_count(ifp))
++              return false;
++      xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
++      return true;
++}
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -182,6 +182,12 @@ void              xfs_iext_irec_compact_pages(struct
+ void          xfs_iext_irec_compact_full(struct xfs_ifork *);
+ void          xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
++bool          xfs_iext_lookup_extent(struct xfs_inode *ip,
++                      struct xfs_ifork *ifp, xfs_fileoff_t bno,
++                      xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
++bool          xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
++                      struct xfs_bmbt_irec *gotp);
++
+ extern struct kmem_zone       *xfs_ifork_zone;
+ extern void xfs_ifork_init_cow(struct xfs_inode *ip);
diff --git a/queue-4.9/xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch b/queue-4.9/xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch
new file mode 100644 (file)
index 0000000..f8abf1a
--- /dev/null
@@ -0,0 +1,114 @@
+From hch@lst.de  Tue Jan 10 11:25:53 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:47 +0100
+Subject: xfs: pass post-eof speculative prealloc blocks to bmapi
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-17-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f782088c9e5d08e9494c63e68b4e85716df3e5f8 upstream.
+
+xfs_file_iomap_begin_delay() implements post-eof speculative
+preallocation by extending the block count of the requested delayed
+allocation. Now that xfs_bmapi_reserve_delalloc() has been updated to
+handle prealloc blocks separately and tag the inode, update
+xfs_file_iomap_begin_delay() to use the new parameter and rely on the
+former to tag the inode.
+
+Note that this patch does not change behavior.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c |   33 +++++++++++++--------------------
+ 1 file changed, 13 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -536,10 +536,11 @@ xfs_file_iomap_begin_delay(
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           maxbytes_fsb =
+               XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
+-      xfs_fileoff_t           end_fsb, orig_end_fsb;
++      xfs_fileoff_t           end_fsb;
+       int                     error = 0, eof = 0;
+       struct xfs_bmbt_irec    got;
+       xfs_extnum_t            idx;
++      xfs_fsblock_t           prealloc_blocks = 0;
+       ASSERT(!XFS_IS_REALTIME_INODE(ip));
+       ASSERT(!xfs_get_extsz_hint(ip));
+@@ -594,33 +595,32 @@ xfs_file_iomap_begin_delay(
+        * the lower level functions are updated.
+        */
+       count = min_t(loff_t, count, 1024 * PAGE_SIZE);
+-      end_fsb = orig_end_fsb =
+-              min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
++      end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+       if (eof) {
+-              xfs_fsblock_t   prealloc_blocks;
+-
+               prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
+               if (prealloc_blocks) {
+                       xfs_extlen_t    align;
+                       xfs_off_t       end_offset;
++                      xfs_fileoff_t   p_end_fsb;
+                       end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
+-                      end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
+-                              prealloc_blocks;
++                      p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
++                                      prealloc_blocks;
+                       align = xfs_eof_alignment(ip, 0);
+                       if (align)
+-                              end_fsb = roundup_64(end_fsb, align);
++                              p_end_fsb = roundup_64(p_end_fsb, align);
+-                      end_fsb = min(end_fsb, maxbytes_fsb);
+-                      ASSERT(end_fsb > offset_fsb);
++                      p_end_fsb = min(p_end_fsb, maxbytes_fsb);
++                      ASSERT(p_end_fsb > offset_fsb);
++                      prealloc_blocks = p_end_fsb - end_fsb;
+               }
+       }
+ retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+-                      end_fsb - offset_fsb, 0, &got, &idx, eof);
++                      end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
+       switch (error) {
+       case 0:
+               break;
+@@ -628,8 +628,8 @@ retry:
+       case -EDQUOT:
+               /* retry without any preallocation */
+               trace_xfs_delalloc_enospc(ip, offset, count);
+-              if (end_fsb != orig_end_fsb) {
+-                      end_fsb = orig_end_fsb;
++              if (prealloc_blocks) {
++                      prealloc_blocks = 0;
+                       goto retry;
+               }
+               /*FALLTHRU*/
+@@ -637,13 +637,6 @@ retry:
+               goto out_unlock;
+       }
+-      /*
+-       * Tag the inode as speculatively preallocated so we can reclaim this
+-       * space on demand, if necessary.
+-       */
+-      if (end_fsb != orig_end_fsb)
+-              xfs_inode_set_eofblocks_tag(ip);
+-
+       trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
+ done:
+       if (isnullstartblock(got.br_startblock))
diff --git a/queue-4.9/xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch b/queue-4.9/xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch
new file mode 100644 (file)
index 0000000..0c64c11
--- /dev/null
@@ -0,0 +1,46 @@
+From hch@lst.de  Tue Jan 10 11:26:14 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:49 +0100
+Subject: xfs: pass state not whichfork to trace_xfs_extlist
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-19-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 7710517fc37b1899722707883b54694ea710b3c0 upstream.
+
+When xfs_bmap_trace_exlist called trace_xfs_extlist,
+it sent in the "whichfork" var instead of the bmap "state"
+as expected (even though state was already set up for this
+purpose).
+
+As a result, the xfs_bmap_class in tracing code used
+"whichfork" not state in xfs_iext_state_to_fork(), and got
+the wrong ifork pointer.  It all goes downhill from
+there, including an ASSERT when ifp_bytes is empty
+by the time it reaches xfs_iext_get_ext():
+
+XFS: Assertion failed: idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -531,7 +531,7 @@ xfs_bmap_trace_exlist(
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(cnt == xfs_iext_count(ifp));
+       for (idx = 0; idx < cnt; idx++)
+-              trace_xfs_extlist(ip, idx, whichfork, caller_ip);
++              trace_xfs_extlist(ip, idx, state, caller_ip);
+ }
+ /*
diff --git a/queue-4.9/xfs-provide-helper-for-counting-extents-from-if_bytes.patch b/queue-4.9/xfs-provide-helper-for-counting-extents-from-if_bytes.patch
new file mode 100644 (file)
index 0000000..caf0a67
--- /dev/null
@@ -0,0 +1,523 @@
+From hch@lst.de  Tue Jan 10 11:23:27 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:36 +0100
+Subject: xfs: provide helper for counting extents from if_bytes
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-6-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 5d829300bee000980a09ac2ccb761cb25867b67c upstream.
+
+The open-coded pattern:
+
+ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)
+
+is all over the xfs code; provide a new helper
+xfs_iext_count(ifp) to count the number of inline extents
+in an inode fork.
+
+[dchinner: pick up several missed conversions]
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c       |   49 +++++++++++++++++++----------------------
+ fs/xfs/libxfs/xfs_inode_fork.c |   31 +++++++++++++++----------
+ fs/xfs/libxfs/xfs_inode_fork.h |    1 
+ fs/xfs/xfs_bmap_util.c         |   34 +++++++++++-----------------
+ fs/xfs/xfs_inode_item.c        |    4 +--
+ fs/xfs/xfs_ioctl.c             |    6 +----
+ fs/xfs/xfs_qm.c                |    2 -
+ fs/xfs/xfs_reflink.c           |    4 +--
+ 8 files changed, 64 insertions(+), 67 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -515,7 +515,7 @@ xfs_bmap_trace_exlist(
+               state |= BMAP_ATTRFORK;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+-      ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
++      ASSERT(cnt == xfs_iext_count(ifp));
+       for (idx = 0; idx < cnt; idx++)
+               trace_xfs_extlist(ip, idx, whichfork, caller_ip);
+ }
+@@ -811,7 +811,7 @@ try_another_ag:
+                               XFS_BTREE_LONG_PTRS);
+       arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents =  xfs_iext_count(ifp);
+       for (cnt = i = 0; i < nextents; i++) {
+               ep = xfs_iext_get_ext(ifp, i);
+               if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
+@@ -1296,7 +1296,7 @@ xfs_bmap_read_extents(
+       /*
+        * Here with bp and block set to the leftmost leaf node in the tree.
+        */
+-      room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      room = xfs_iext_count(ifp);
+       i = 0;
+       /*
+        * Loop over all leaf nodes.  Copy information to the extent records.
+@@ -1361,7 +1361,7 @@ xfs_bmap_read_extents(
+                       return error;
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
+-      ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
++      ASSERT(i == xfs_iext_count(ifp));
+       ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+       XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
+       return 0;
+@@ -1404,7 +1404,7 @@ xfs_bmap_search_multi_extents(
+       if (lastx > 0) {
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
+       }
+-      if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
++      if (lastx < xfs_iext_count(ifp)) {
+               xfs_bmbt_get_all(ep, gotp);
+               *eofp = 0;
+       } else {
+@@ -1497,7 +1497,7 @@ xfs_bmap_first_unused(
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       lowest = *first_unused;
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
+               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
+               off = xfs_bmbt_get_startoff(ep);
+@@ -1582,7 +1582,7 @@ xfs_bmap_last_extent(
+                       return error;
+       }
+-      nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       if (nextents == 0) {
+               *is_empty = 1;
+               return 0;
+@@ -1735,7 +1735,7 @@ xfs_bmap_add_extent_delay_real(
+                                               &bma->ip->i_d.di_nextents);
+       ASSERT(bma->idx >= 0);
+-      ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++      ASSERT(bma->idx <= xfs_iext_count(ifp));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+@@ -1794,7 +1794,7 @@ xfs_bmap_add_extent_delay_real(
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+-      if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
++      if (bma->idx < xfs_iext_count(ifp) - 1) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
+@@ -2300,7 +2300,7 @@ xfs_bmap_add_extent_unwritten_real(
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       ASSERT(*idx >= 0);
+-      ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++      ASSERT(*idx <= xfs_iext_count(ifp));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       XFS_STATS_INC(mp, xs_add_exlist);
+@@ -2356,7 +2356,7 @@ xfs_bmap_add_extent_unwritten_real(
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+-      if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
++      if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
+               if (isnullstartblock(RIGHT.br_startblock))
+@@ -2836,7 +2836,7 @@ xfs_bmap_add_extent_hole_delay(
+        * Check and set flags if the current (right) segment exists.
+        * If it doesn't exist, we're converting the hole at end-of-file.
+        */
+-      if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
++      if (*idx < xfs_iext_count(ifp)) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
+@@ -2966,7 +2966,7 @@ xfs_bmap_add_extent_hole_real(
+       ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       ASSERT(bma->idx >= 0);
+-      ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++      ASSERT(bma->idx <= xfs_iext_count(ifp));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+@@ -2992,7 +2992,7 @@ xfs_bmap_add_extent_hole_real(
+        * Check and set flags if this segment has a current value.
+        * Not true if we're inserting into the "hole" at eof.
+        */
+-      if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
++      if (bma->idx < xfs_iext_count(ifp)) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
+               if (isnullstartblock(right.br_startblock))
+@@ -4221,7 +4221,7 @@ xfs_bmapi_read(
+                       break;
+               /* Else go on to the next record. */
+-              if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
++              if (++lastx < xfs_iext_count(ifp))
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+               else
+                       eof = 1;
+@@ -4733,7 +4733,7 @@ xfs_bmapi_write(
+               /* Else go on to the next record. */
+               bma.prev = bma.got;
+-              if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
++              if (++bma.idx < xfs_iext_count(ifp)) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
+                                        &bma.got);
+               } else
+@@ -4885,7 +4885,7 @@ xfs_bmap_del_extent_delay(
+       da_new = 0;
+       ASSERT(*idx >= 0);
+-      ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++      ASSERT(*idx <= xfs_iext_count(ifp));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+@@ -5016,7 +5016,7 @@ xfs_bmap_del_extent_cow(
+       got_endoff = got->br_startoff + got->br_blockcount;
+       ASSERT(*idx >= 0);
+-      ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++      ASSERT(*idx <= xfs_iext_count(ifp));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+@@ -5122,8 +5122,7 @@ xfs_bmap_del_extent(
+               state |= BMAP_COWFORK;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+-      ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
+-              (uint)sizeof(xfs_bmbt_rec_t)));
++      ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
+       ASSERT(del->br_blockcount > 0);
+       ep = xfs_iext_get_ext(ifp, *idx);
+       xfs_bmbt_get_all(ep, &got);
+@@ -5448,7 +5447,6 @@ __xfs_bunmapi(
+       int                     logflags;       /* transaction logging flags */
+       xfs_extlen_t            mod;            /* rt extent offset */
+       xfs_mount_t             *mp;            /* mount structure */
+-      xfs_extnum_t            nextents;       /* number of file extents */
+       xfs_bmbt_irec_t         prev;           /* previous extent record */
+       xfs_fileoff_t           start;          /* first file offset deleted */
+       int                     tmp_logflags;   /* partial logging flags */
+@@ -5480,8 +5478,7 @@ __xfs_bunmapi(
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+-      if (nextents == 0) {
++      if (xfs_iext_count(ifp) == 0) {
+               *rlen = 0;
+               return 0;
+       }
+@@ -5966,7 +5963,7 @@ xfs_bmse_shift_one(
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+-      total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++      total_extents = xfs_iext_count(ifp);
+       xfs_bmbt_get_all(gotp, &got);
+@@ -6143,7 +6140,7 @@ xfs_bmap_shift_extents(
+        * are collapsing out, so we cannot use the count of real extents here.
+        * Instead we have to calculate it from the incore fork.
+        */
+-      total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++      total_extents = xfs_iext_count(ifp);
+       if (total_extents == 0) {
+               *done = 1;
+               goto del_cursor;
+@@ -6203,7 +6200,7 @@ xfs_bmap_shift_extents(
+                * count can change. Update the total and grade the next record.
+                */
+               if (direction == SHIFT_LEFT) {
+-                      total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++                      total_extents = xfs_iext_count(ifp);
+                       stop_extent = total_extents;
+               }
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -775,6 +775,13 @@ xfs_idestroy_fork(
+       }
+ }
++/* Count number of incore extents based on if_bytes */
++xfs_extnum_t
++xfs_iext_count(struct xfs_ifork *ifp)
++{
++      return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++}
++
+ /*
+  * Convert in-core extents to on-disk form
+  *
+@@ -803,7 +810,7 @@ xfs_iextents_copy(
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(ifp->if_bytes > 0);
+-      nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nrecs = xfs_iext_count(ifp);
+       XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
+       ASSERT(nrecs > 0);
+@@ -941,7 +948,7 @@ xfs_iext_get_ext(
+       xfs_extnum_t    idx)            /* index of target extent */
+ {
+       ASSERT(idx >= 0);
+-      ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
++      ASSERT(idx < xfs_iext_count(ifp));
+       if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+               return ifp->if_u1.if_ext_irec->er_extbuf;
+@@ -1017,7 +1024,7 @@ xfs_iext_add(
+       int             new_size;       /* size of extents after adding */
+       xfs_extnum_t    nextents;       /* number of extents in file */
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       ASSERT((idx >= 0) && (idx <= nextents));
+       byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+       new_size = ifp->if_bytes + byte_diff;
+@@ -1241,7 +1248,7 @@ xfs_iext_remove(
+       trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
+       ASSERT(ext_diff > 0);
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+       if (new_size == 0) {
+@@ -1270,7 +1277,7 @@ xfs_iext_remove_inline(
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+       ASSERT(idx < XFS_INLINE_EXTS);
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       ASSERT(((nextents - ext_diff) > 0) &&
+               (nextents - ext_diff) < XFS_INLINE_EXTS);
+@@ -1309,7 +1316,7 @@ xfs_iext_remove_direct(
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+       new_size = ifp->if_bytes -
+               (ext_diff * sizeof(xfs_bmbt_rec_t));
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       if (new_size == 0) {
+               xfs_iext_destroy(ifp);
+@@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct(
+       int             size;           /* size of file extents */
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       ASSERT(nextents <= XFS_LINEAR_EXTS);
+       size = nextents * sizeof(xfs_bmbt_rec_t);
+@@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext(
+       xfs_extnum_t    nextents;       /* number of file extents */
+       xfs_fileoff_t   startoff = 0;   /* start offset of extent */
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       if (nextents == 0) {
+               *idxp = 0;
+               return NULL;
+@@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec(
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       ASSERT(page_idx >= 0);
+-      ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+-      ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
++      ASSERT(page_idx <= xfs_iext_count(ifp));
++      ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       erp_idx = 0;
+@@ -1782,7 +1789,7 @@ xfs_iext_irec_init(
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       ASSERT(nextents <= XFS_LINEAR_EXTS);
+       erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
+@@ -1906,7 +1913,7 @@ xfs_iext_irec_compact(
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       if (nextents == 0) {
+               xfs_iext_destroy(ifp);
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -152,6 +152,7 @@ void               xfs_init_local_fork(struct xfs_ino
+ struct xfs_bmbt_rec_host *
+               xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
++xfs_extnum_t  xfs_iext_count(struct xfs_ifork *);
+ void          xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
+                               struct xfs_bmbt_irec *, int);
+ void          xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -359,9 +359,7 @@ xfs_bmap_count_blocks(
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
+-              xfs_bmap_count_leaves(ifp, 0,
+-                      ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
+-                      count);
++              xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+               return 0;
+       }
+@@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole(
+               ifp = XFS_IFORK_PTR(ip, whichfork);
+               if (!moretocome &&
+                   xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
+-                 (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
++                 (lastx == xfs_iext_count(ifp) - 1))
+                       out->bmv_oflags |= BMV_OF_LAST;
+       }
+@@ -1878,15 +1876,13 @@ xfs_swap_extent_forks(
+       switch (ip->i_d.di_format) {
+       case XFS_DINODE_FMT_EXTENTS:
+-              /* If the extents fit in the inode, fix the
+-               * pointer.  Otherwise it's already NULL or
+-               * pointing to the extent.
++              /*
++               * If the extents fit in the inode, fix the pointer.  Otherwise
++               * it's already NULL or pointing to the extent.
+                */
+-              nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+-              if (nextents <= XFS_INLINE_EXTS) {
+-                      ifp->if_u1.if_extents =
+-                              ifp->if_u2.if_inline_ext;
+-              }
++              nextents = xfs_iext_count(&ip->i_df);
++              if (nextents <= XFS_INLINE_EXTS)
++                      ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+               (*src_log_flags) |= XFS_ILOG_DEXT;
+               break;
+       case XFS_DINODE_FMT_BTREE:
+@@ -1898,15 +1894,13 @@ xfs_swap_extent_forks(
+       switch (tip->i_d.di_format) {
+       case XFS_DINODE_FMT_EXTENTS:
+-              /* If the extents fit in the inode, fix the
+-               * pointer.  Otherwise it's already NULL or
+-               * pointing to the extent.
++              /*
++               * If the extents fit in the inode, fix the pointer.  Otherwise
++               * it's already NULL or pointing to the extent.
+                */
+-              nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+-              if (nextents <= XFS_INLINE_EXTS) {
+-                      tifp->if_u1.if_extents =
+-                              tifp->if_u2.if_inline_ext;
+-              }
++              nextents = xfs_iext_count(&tip->i_df);
++              if (nextents <= XFS_INLINE_EXTS)
++                      tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
+               (*target_log_flags) |= XFS_ILOG_DEXT;
+               break;
+       case XFS_DINODE_FMT_BTREE:
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork(
+                       struct xfs_bmbt_rec *p;
+                       ASSERT(ip->i_df.if_u1.if_extents != NULL);
+-                      ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
++                      ASSERT(xfs_iext_count(&ip->i_df) > 0);
+                       p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
+                       data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
+@@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork(
+                   ip->i_afp->if_bytes > 0) {
+                       struct xfs_bmbt_rec *p;
+-                      ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
++                      ASSERT(xfs_iext_count(ip->i_afp) ==
+                               ip->i_d.di_anextents);
+                       ASSERT(ip->i_afp->if_u1.if_extents != NULL);
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr(
+       if (attr) {
+               if (ip->i_afp) {
+                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+-                              fa.fsx_nextents = ip->i_afp->if_bytes /
+-                                                      sizeof(xfs_bmbt_rec_t);
++                              fa.fsx_nextents = xfs_iext_count(ip->i_afp);
+                       else
+                               fa.fsx_nextents = ip->i_d.di_anextents;
+               } else
+                       fa.fsx_nextents = 0;
+       } else {
+               if (ip->i_df.if_flags & XFS_IFEXTENTS)
+-                      fa.fsx_nextents = ip->i_df.if_bytes /
+-                                              sizeof(xfs_bmbt_rec_t);
++                      fa.fsx_nextents = xfs_iext_count(&ip->i_df);
+               else
+                       fa.fsx_nextents = ip->i_d.di_nextents;
+       }
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks(
+                       return error;
+       }
+       rtblks = 0;
+-      nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++      nextents = xfs_iext_count(ifp);
+       for (idx = 0; idx < nextents; idx++)
+               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+       *O_rtblks = (xfs_qcnt_t)rtblks;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -486,7 +486,7 @@ xfs_reflink_trim_irec_to_next_cow(
+       /* This is the extent before; try sliding up one. */
+       if (irec.br_startoff < offset_fsb) {
+               idx++;
+-              if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
++              if (idx >= xfs_iext_count(ifp))
+                       return 0;
+               gotp = xfs_iext_get_ext(ifp, idx);
+               xfs_bmbt_get_all(gotp, &irec);
+@@ -566,7 +566,7 @@ xfs_reflink_cancel_cow_blocks(
+                       xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+               }
+-              if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
++              if (++idx >= xfs_iext_count(ifp))
+                       break;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
+       }
diff --git a/queue-4.9/xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch b/queue-4.9/xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch
new file mode 100644 (file)
index 0000000..994723b
--- /dev/null
@@ -0,0 +1,85 @@
+From hch@lst.de  Tue Jan 10 11:24:50 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:42 +0100
+Subject: xfs: remove prev argument to xfs_bmapi_reserve_delalloc
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-12-git-send-email-hch@lst.de>
+
+
+commit 65c5f419788d623a0410eca1866134f5e4628594 upstream.
+
+We can easily lookup the previous extent for the cases where we need it,
+which saves the callers from looking it up for us later in the series.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    8 ++++++--
+ fs/xfs/libxfs/xfs_bmap.h |    3 +--
+ fs/xfs/xfs_iomap.c       |    3 +--
+ fs/xfs/xfs_reflink.c     |    2 +-
+ 4 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4250,7 +4250,6 @@ xfs_bmapi_reserve_delalloc(
+       xfs_fileoff_t           aoff,
+       xfs_filblks_t           len,
+       struct xfs_bmbt_irec    *got,
+-      struct xfs_bmbt_irec    *prev,
+       xfs_extnum_t            *lastx,
+       int                     eof)
+ {
+@@ -4272,7 +4271,12 @@ xfs_bmapi_reserve_delalloc(
+       else
+               extsz = xfs_get_extsz_hint(ip);
+       if (extsz) {
+-              error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
++              struct xfs_bmbt_irec    prev;
++
++              if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
++                      prev.br_startoff = NULLFILEOFF;
++
++              error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
+                                              1, 0, &aoff, &alen);
+               ASSERT(!error);
+       }
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -243,8 +243,7 @@ struct xfs_bmbt_rec_host *
+               struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
+ int   xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+               xfs_fileoff_t aoff, xfs_filblks_t len,
+-              struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev,
+-              xfs_extnum_t *lastx, int eof);
++              struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
+ enum xfs_bmap_intent_type {
+       XFS_BMAP_MAP = 1,
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -622,8 +622,7 @@ xfs_file_iomap_begin_delay(
+ retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+-                      end_fsb - offset_fsb, &got,
+-                      &prev, &idx, eof);
++                      end_fsb - offset_fsb, &got, &idx, eof);
+       switch (error) {
+       case 0:
+               break;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -293,7 +293,7 @@ xfs_reflink_reserve_cow(
+ retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+-                      end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
++                      end_fsb - imap->br_startoff, &got, &idx, eof);
+       switch (error) {
+       case 0:
+               break;
diff --git a/queue-4.9/xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch b/queue-4.9/xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch
new file mode 100644 (file)
index 0000000..badb43e
--- /dev/null
@@ -0,0 +1,141 @@
+From hch@lst.de  Tue Jan 10 11:25:03 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:43 +0100
+Subject: xfs: track preallocation separately in xfs_bmapi_reserve_delalloc()
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-13-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 974ae922efd93b07b6cdf989ae959883f6f05fd8 upstream.
+
+Speculative preallocation is currently processed entirely by the callers
+of xfs_bmapi_reserve_delalloc(). The caller determines how much
+preallocation to include, adjusts the extent length and passes down the
+resulting request.
+
+While this works fine for post-eof speculative preallocation, it is not
+as reliable for COW fork preallocation. COW fork preallocation is
+implemented via the cowextszhint, which aligns the start offset as well
+as the length of the extent. Further, it is difficult for the caller to
+accurately identify when preallocation occurs because the returned
+extent could have been merged with neighboring extents in the fork.
+
+To simplify this situation and facilitate further COW fork preallocation
+enhancements, update xfs_bmapi_reserve_delalloc() to take a separate
+preallocation parameter to incorporate into the allocation request. The
+preallocation blocks value is tacked onto the end of the request and
+adjusted to accommodate neighboring extents and extent size limits.
+Since xfs_bmapi_reserve_delalloc() now knows precisely how much
+preallocation was included in the allocation, it can also tag the inodes
+appropriately to support preallocation reclaim.
+
+Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to
+use the preallocation mechanism. This patch should not change behavior
+outside of correctly tagging reflink inodes when start offset
+preallocation occurs (which the caller does not handle correctly).
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |   23 +++++++++++++++++++++--
+ fs/xfs/libxfs/xfs_bmap.h |    2 +-
+ fs/xfs/xfs_iomap.c       |    2 +-
+ fs/xfs/xfs_reflink.c     |    2 +-
+ 4 files changed, 24 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -50,6 +50,7 @@
+ #include "xfs_ag_resv.h"
+ #include "xfs_refcount.h"
+ #include "xfs_rmap_btree.h"
++#include "xfs_icache.h"
+ kmem_zone_t           *xfs_bmap_free_item_zone;
+@@ -4247,8 +4248,9 @@ int
+ xfs_bmapi_reserve_delalloc(
+       struct xfs_inode        *ip,
+       int                     whichfork,
+-      xfs_fileoff_t           aoff,
++      xfs_fileoff_t           off,
+       xfs_filblks_t           len,
++      xfs_filblks_t           prealloc,
+       struct xfs_bmbt_irec    *got,
+       xfs_extnum_t            *lastx,
+       int                     eof)
+@@ -4260,10 +4262,17 @@ xfs_bmapi_reserve_delalloc(
+       char                    rt = XFS_IS_REALTIME_INODE(ip);
+       xfs_extlen_t            extsz;
+       int                     error;
++      xfs_fileoff_t           aoff = off;
+-      alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
++      /*
++       * Cap the alloc length. Keep track of prealloc so we know whether to
++       * tag the inode before we return.
++       */
++      alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
+       if (!eof)
+               alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
++      if (prealloc && alen >= len)
++              prealloc = alen - len;
+       /* Figure out the extent size, adjust alen */
+       if (whichfork == XFS_COW_FORK)
+@@ -4329,6 +4338,16 @@ xfs_bmapi_reserve_delalloc(
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
++      /*
++       * Tag the inode if blocks were preallocated. Note that COW fork
++       * preallocation can occur at the start or end of the extent, even when
++       * prealloc == 0, so we must also check the aligned offset and length.
++       */
++      if (whichfork == XFS_DATA_FORK && prealloc)
++              xfs_inode_set_eofblocks_tag(ip);
++      if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
++              xfs_inode_set_cowblocks_tag(ip);
++
+       ASSERT(got->br_startoff <= aoff);
+       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+       ASSERT(isnullstartblock(got->br_startblock));
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -242,7 +242,7 @@ struct xfs_bmbt_rec_host *
+               int fork, int *eofp, xfs_extnum_t *lastxp,
+               struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
+ int   xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+-              xfs_fileoff_t aoff, xfs_filblks_t len,
++              xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+               struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
+ enum xfs_bmap_intent_type {
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -622,7 +622,7 @@ xfs_file_iomap_begin_delay(
+ retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+-                      end_fsb - offset_fsb, &got, &idx, eof);
++                      end_fsb - offset_fsb, 0, &got, &idx, eof);
+       switch (error) {
+       case 0:
+               break;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -293,7 +293,7 @@ xfs_reflink_reserve_cow(
+ retry:
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+-                      end_fsb - imap->br_startoff, &got, &idx, eof);
++                      end_fsb - imap->br_startoff, 0, &got, &idx, eof);
+       switch (error) {
+       case 0:
+               break;
diff --git a/queue-4.9/xfs-use-gpf_nofs-when-allocating-btree-cursors.patch b/queue-4.9/xfs-use-gpf_nofs-when-allocating-btree-cursors.patch
new file mode 100644 (file)
index 0000000..4398925
--- /dev/null
@@ -0,0 +1,60 @@
+From hch@lst.de  Tue Jan 10 11:30:48 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:59 +0100
+Subject: xfs: use GPF_NOFS when allocating btree cursors
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-29-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit b24a978c377be5f14e798cb41238e66fe51aab2f upstream.
+
+Use NOFS for allocating btree cursors, since they can be called
+under the ilock.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc_btree.c  |    2 +-
+ fs/xfs/libxfs/xfs_bmap_btree.c   |    2 +-
+ fs/xfs/libxfs/xfs_ialloc_btree.c |    2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc_btree.c
++++ b/fs/xfs/libxfs/xfs_alloc_btree.c
+@@ -421,7 +421,7 @@ xfs_allocbt_init_cursor(
+       ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
+-      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -796,7 +796,7 @@ xfs_bmbt_init_cursor(
+       struct xfs_btree_cur    *cur;
+       ASSERT(whichfork != XFS_COW_FORK);
+-      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -357,7 +357,7 @@ xfs_inobt_init_cursor(
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+       struct xfs_btree_cur    *cur;
+-      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++      cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
diff --git a/queue-4.9/xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch b/queue-4.9/xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch
new file mode 100644 (file)
index 0000000..05342c6
--- /dev/null
@@ -0,0 +1,49 @@
+From hch@lst.de  Tue Jan 10 11:25:16 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:44 +0100
+Subject: xfs: use new extent lookup helpers in __xfs_reflink_reserve_cow
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-14-git-send-email-hch@lst.de>
+
+
+commit 2755fc4438501c8c28e7783df890e889f6772bee upstream.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -243,10 +243,11 @@ xfs_reflink_reserve_cow(
+       struct xfs_bmbt_irec    *imap,
+       bool                    *shared)
+ {
+-      struct xfs_bmbt_irec    got, prev;
++      struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
++      struct xfs_bmbt_irec    got;
+       xfs_fileoff_t           end_fsb, orig_end_fsb;
+-      int                     eof = 0, error = 0;
+-      bool                    trimmed;
++      int                     error = 0;
++      bool                    eof = false, trimmed;
+       xfs_extnum_t            idx;
+       xfs_extlen_t            align;
+@@ -258,8 +259,9 @@ xfs_reflink_reserve_cow(
+        * extent list is generally faster than going out to the shared extent
+        * tree.
+        */
+-      xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
+-                      &got, &prev);
++
++      if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
++              eof = true;
+       if (!eof && got.br_startoff <= imap->br_startoff) {
+               trace_xfs_reflink_cow_found(ip, imap);
+               xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
diff --git a/queue-4.9/xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch b/queue-4.9/xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch
new file mode 100644 (file)
index 0000000..03239cd
--- /dev/null
@@ -0,0 +1,90 @@
+From hch@lst.de  Tue Jan 10 11:25:38 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:38:46 +0100
+Subject: xfs: use new extent lookup helpers xfs_file_iomap_begin_delay
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-16-git-send-email-hch@lst.de>
+
+
+commit 656152e552e5cbe0c11ad261b524376217c2fb13 upstream.
+
+And only lookup the previous extent inside xfs_iomap_prealloc_size
+if we actually need it.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c |   20 +++++++++-----------
+ 1 file changed, 9 insertions(+), 11 deletions(-)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -395,11 +395,12 @@ xfs_iomap_prealloc_size(
+       struct xfs_inode        *ip,
+       loff_t                  offset,
+       loff_t                  count,
+-      xfs_extnum_t            idx,
+-      struct xfs_bmbt_irec    *prev)
++      xfs_extnum_t            idx)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
++      struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
++      struct xfs_bmbt_irec    prev;
+       int                     shift = 0;
+       int64_t                 freesp;
+       xfs_fsblock_t           qblocks;
+@@ -419,8 +420,8 @@ xfs_iomap_prealloc_size(
+        */
+       if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
+           XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
+-          idx == 0 ||
+-          prev->br_startoff + prev->br_blockcount < offset_fsb)
++          !xfs_iext_get_extent(ifp, idx - 1, &prev) ||
++          prev.br_startoff + prev.br_blockcount < offset_fsb)
+               return mp->m_writeio_blocks;
+       /*
+@@ -439,8 +440,8 @@ xfs_iomap_prealloc_size(
+        * always extends to MAXEXTLEN rather than falling short due to things
+        * like stripe unit/width alignment of real extents.
+        */
+-      if (prev->br_blockcount <= (MAXEXTLEN >> 1))
+-              alloc_blocks = prev->br_blockcount << 1;
++      if (prev.br_blockcount <= (MAXEXTLEN >> 1))
++              alloc_blocks = prev.br_blockcount << 1;
+       else
+               alloc_blocks = XFS_B_TO_FSB(mp, offset);
+       if (!alloc_blocks)
+@@ -538,7 +539,6 @@ xfs_file_iomap_begin_delay(
+       xfs_fileoff_t           end_fsb, orig_end_fsb;
+       int                     error = 0, eof = 0;
+       struct xfs_bmbt_irec    got;
+-      struct xfs_bmbt_irec    prev;
+       xfs_extnum_t            idx;
+       ASSERT(!XFS_IS_REALTIME_INODE(ip));
+@@ -563,8 +563,7 @@ xfs_file_iomap_begin_delay(
+                       goto out_unlock;
+       }
+-      xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
+-                      &got, &prev);
++      eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+       if (!eof && got.br_startoff <= offset_fsb) {
+               if (xfs_is_reflink_inode(ip)) {
+                       bool            shared;
+@@ -601,8 +600,7 @@ xfs_file_iomap_begin_delay(
+       if (eof) {
+               xfs_fsblock_t   prealloc_blocks;
+-              prealloc_blocks =
+-                      xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
++              prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
+               if (prealloc_blocks) {
+                       xfs_extlen_t    align;
+                       xfs_off_t       end_offset;
diff --git a/queue-4.9/xfs-use-the-actual-ag-length-when-reserving-blocks.patch b/queue-4.9/xfs-use-the-actual-ag-length-when-reserving-blocks.patch
new file mode 100644 (file)
index 0000000..521172b
--- /dev/null
@@ -0,0 +1,184 @@
+From hch@lst.de  Tue Jan 10 11:31:10 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon,  9 Jan 2017 16:39:01 +0100
+Subject: xfs: use the actual AG length when reserving blocks
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-31-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 20e73b000bcded44a91b79429d8fa743247602ad upstream.
+
+We need to use the actual AG length when making per-AG reservations,
+since we could otherwise end up reserving more blocks out of the last
+AG than there are actual blocks.
+
+Complained-about-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag_resv.c        |    3 +++
+ fs/xfs/libxfs/xfs_refcount_btree.c |    9 ++++++---
+ fs/xfs/libxfs/xfs_refcount_btree.h |    3 ++-
+ fs/xfs/libxfs/xfs_rmap_btree.c     |   14 +++++++-------
+ fs/xfs/libxfs/xfs_rmap_btree.h     |    3 ++-
+ fs/xfs/xfs_fsops.c                 |   14 ++++++++++++++
+ 6 files changed, 34 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -256,6 +256,9 @@ xfs_ag_resv_init(
+                       goto out;
+       }
++      ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
++             xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
++             pag->pagf_freeblks + pag->pagf_flcount);
+ out:
+       return error;
+ }
+--- a/fs/xfs/libxfs/xfs_refcount_btree.c
++++ b/fs/xfs/libxfs/xfs_refcount_btree.c
+@@ -408,13 +408,14 @@ xfs_refcountbt_calc_size(
+  */
+ xfs_extlen_t
+ xfs_refcountbt_max_size(
+-      struct xfs_mount        *mp)
++      struct xfs_mount        *mp,
++      xfs_agblock_t           agblocks)
+ {
+       /* Bail out if we're uninitialized, which can happen in mkfs. */
+       if (mp->m_refc_mxr[0] == 0)
+               return 0;
+-      return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks);
++      return xfs_refcountbt_calc_size(mp, agblocks);
+ }
+ /*
+@@ -429,22 +430,24 @@ xfs_refcountbt_calc_reserves(
+ {
+       struct xfs_buf          *agbp;
+       struct xfs_agf          *agf;
++      xfs_agblock_t           agblocks;
+       xfs_extlen_t            tree_len;
+       int                     error;
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return 0;
+-      *ask += xfs_refcountbt_max_size(mp);
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error)
+               return error;
+       agf = XFS_BUF_TO_AGF(agbp);
++      agblocks = be32_to_cpu(agf->agf_length);
+       tree_len = be32_to_cpu(agf->agf_refcount_blocks);
+       xfs_buf_relse(agbp);
++      *ask += xfs_refcountbt_max_size(mp, agblocks);
+       *used += tree_len;
+       return error;
+--- a/fs/xfs/libxfs/xfs_refcount_btree.h
++++ b/fs/xfs/libxfs/xfs_refcount_btree.h
+@@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxle
+ extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
+               unsigned long long len);
+-extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp);
++extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp,
++              xfs_agblock_t agblocks);
+ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
+               xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+--- a/fs/xfs/libxfs/xfs_rmap_btree.c
++++ b/fs/xfs/libxfs/xfs_rmap_btree.c
+@@ -549,13 +549,14 @@ xfs_rmapbt_calc_size(
+  */
+ xfs_extlen_t
+ xfs_rmapbt_max_size(
+-      struct xfs_mount        *mp)
++      struct xfs_mount        *mp,
++      xfs_agblock_t           agblocks)
+ {
+       /* Bail out if we're uninitialized, which can happen in mkfs. */
+       if (mp->m_rmap_mxr[0] == 0)
+               return 0;
+-      return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks);
++      return xfs_rmapbt_calc_size(mp, agblocks);
+ }
+ /*
+@@ -570,25 +571,24 @@ xfs_rmapbt_calc_reserves(
+ {
+       struct xfs_buf          *agbp;
+       struct xfs_agf          *agf;
+-      xfs_extlen_t            pool_len;
++      xfs_agblock_t           agblocks;
+       xfs_extlen_t            tree_len;
+       int                     error;
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return 0;
+-      /* Reserve 1% of the AG or enough for 1 block per record. */
+-      pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp));
+-      *ask += pool_len;
+-
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error)
+               return error;
+       agf = XFS_BUF_TO_AGF(agbp);
++      agblocks = be32_to_cpu(agf->agf_length);
+       tree_len = be32_to_cpu(agf->agf_rmap_blocks);
+       xfs_buf_relse(agbp);
++      /* Reserve 1% of the AG or enough for 1 block per record. */
++      *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
+       *used += tree_len;
+       return error;
+--- a/fs/xfs/libxfs/xfs_rmap_btree.h
++++ b/fs/xfs/libxfs/xfs_rmap_btree.h
+@@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels
+ extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
+               unsigned long long len);
+-extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp);
++extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
++              xfs_agblock_t agblocks);
+ extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
+               xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -631,6 +631,20 @@ xfs_growfs_data_private(
+       xfs_set_low_space_thresholds(mp);
+       mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
++      /*
++       * If we expanded the last AG, free the per-AG reservation
++       * so we can reinitialize it with the new size.
++       */
++      if (new) {
++              struct xfs_perag        *pag;
++
++              pag = xfs_perag_get(mp, agno);
++              error = xfs_ag_resv_free(pag);
++              xfs_perag_put(pag);
++              if (error)
++                      goto out;
++      }
++
+       /* Reserve AG metadata blocks. */
+       error = xfs_fs_reserve_ag_blocks(mp);
+       if (error && error != -ENOSPC)