drm-i915-gen9-unconditionally-apply-the-memory-bandwidth-wa.patch
drm-i915-gen9-fix-the-wm-memory-bandwidth-wa-for-y-tiling-cases.patch
tpm_tis-check-return-values-from-get_burstcount.patch
+xfs-don-t-call-xfs_sb_quota_from_disk-twice.patch
+xfs-check-return-value-of-_trans_reserve_quota_nblks.patch
+xfs-don-t-skip-cow-forks-w-delalloc-blocks-in-cowblocks-scan.patch
+xfs-don-t-bug-on-mixed-direct-and-mapped-i-o.patch
+xfs-provide-helper-for-counting-extents-from-if_bytes.patch
+xfs-check-minimum-block-size-for-crc-filesystems.patch
+xfs-fix-unbalanced-inode-reclaim-flush-locking.patch
+xfs-new-inode-extent-list-lookup-helpers.patch
+xfs-factor-rmap-btree-size-into-the-indlen-calculations.patch
+xfs-always-succeed-when-deduping-zero-bytes.patch
+xfs-remove-prev-argument-to-xfs_bmapi_reserve_delalloc.patch
+xfs-track-preallocation-separately-in-xfs_bmapi_reserve_delalloc.patch
+xfs-use-new-extent-lookup-helpers-in-__xfs_reflink_reserve_cow.patch
+xfs-clean-up-cow-fork-reservation-and-tag-inodes-correctly.patch
+xfs-use-new-extent-lookup-helpers-xfs_file_iomap_begin_delay.patch
+xfs-pass-post-eof-speculative-prealloc-blocks-to-bmapi.patch
+xfs-move-agi-buffer-type-setting-to-xfs_read_agi.patch
+xfs-pass-state-not-whichfork-to-trace_xfs_extlist.patch
+xfs-handle-cow-fork-in-xfs_bmap_trace_exlist.patch
+xfs-forbid-ag-btrees-with-level-0.patch
+xfs-check-for-bogus-values-in-btree-block-headers.patch
+xfs-complain-if-we-don-t-get-nextents-bmap-records.patch
+xfs-don-t-crash-if-reading-a-directory-results-in-an-unexpected-hole.patch
+xfs-error-out-if-trying-to-add-attrs-and-anextents-0.patch
+xfs-don-t-allow-di_size-with-high-bit-set.patch
+xfs-don-t-cap-maximum-dedupe-request-length.patch
+xfs-ignore-leaf-attr-ichdr.count-in-verifier-during-log-replay.patch
+xfs-use-gpf_nofs-when-allocating-btree-cursors.patch
+xfs-fix-double-cleanup-when-cui-recovery-fails.patch
+xfs-use-the-actual-ag-length-when-reserving-blocks.patch
+xfs-fix-crash-and-data-corruption-due-to-removal-of-busy-cow-extents.patch
+xfs-fix-max_retries-_show-and-_store-functions.patch
--- /dev/null
+From hch@lst.de Tue Jan 10 11:24:37 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:41 +0100
+Subject: xfs: always succeed when deduping zero bytes
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-11-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit fba3e594ef0ad911fa8f559732d588172f212d71 upstream.
+
+It turns out that btrfs and xfs had differing interpretations of what
+to do when the dedupe length is zero. Change xfs to follow btrfs'
+semantics so that the userland interface is consistent.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1345,8 +1345,14 @@ xfs_reflink_remap_range(
+ goto out_unlock;
+ }
+
+- if (len == 0)
++ /* Zero length dedupe exits immediately; reflink goes to EOF. */
++ if (len == 0) {
++ if (is_dedupe) {
++ ret = 0;
++ goto out_unlock;
++ }
+ len = isize - pos_in;
++ }
+
+ /* Ensure offsets don't wrap and the input is inside i_size */
+ if (pos_in + len < pos_in || pos_out + len < pos_out ||
--- /dev/null
+From hch@lst.de Tue Jan 10 11:29:26 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:52 +0100
+Subject: xfs: check for bogus values in btree block headers
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-22-git-send-email-hch@lst.de>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit bb3be7e7c1c18e1b141d4cadeb98cc89ecf78099 upstream.
+
+When we're reading a btree block, make sure that what we retrieved
+matches the owner and level; and has a plausible number of records.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block(
+ if (error)
+ return error;
+
++ /* Check the inode owner since the verifiers don't. */
++ if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
++ (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
++ be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
++ cur->bc_private.b.ip->i_ino)
++ goto out_bad;
++
++ /* Did we get the level we were looking for? */
++ if (be16_to_cpu((*blkp)->bb_level) != level)
++ goto out_bad;
++
++ /* Check that internal nodes have at least one record. */
++ if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
++ goto out_bad;
++
+ xfs_btree_setbuf(cur, level, bp);
+ return 0;
++
++out_bad:
++ *blkp = NULL;
++ xfs_trans_brelse(cur->bc_tp, bp);
++ return -EFSCORRUPTED;
+ }
+
+ /*
--- /dev/null
+From hch@lst.de Tue Jan 10 11:23:44 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:37 +0100
+Subject: xfs: check minimum block size for CRC filesystems
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-7-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit bec9d48d7a303a5bb95c05961ff07ec7eeb59058 upstream.
+
+Check the minimum block size on v5 filesystems.
+
+[dchinner: cleaned up XFS_MIN_CRC_BLOCKSIZE check]
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 6 ++++++
+ fs/xfs/libxfs/xfs_types.h | 3 +++
+ 2 files changed, 9 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -262,6 +262,12 @@ xfs_mount_validate_sb(
+ return -EFSCORRUPTED;
+ }
+
++ if (xfs_sb_version_hascrc(&mp->m_sb) &&
++ sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
++ xfs_notice(mp, "v5 SB sanity check failed");
++ return -EFSCORRUPTED;
++ }
++
+ /*
+ * Until this is fixed only page-sized or smaller data blocks work.
+ */
+--- a/fs/xfs/libxfs/xfs_types.h
++++ b/fs/xfs/libxfs/xfs_types.h
+@@ -75,11 +75,14 @@ typedef __int64_t xfs_sfiloff_t; /* sign
+ * Minimum and maximum blocksize and sectorsize.
+ * The blocksize upper limit is pretty much arbitrary.
+ * The sectorsize upper limit is due to sizeof(sb_sectsize).
++ * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes
++ * cannot be used.
+ */
+ #define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
+ #define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
+ #define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
+ #define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
++#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1))
+ #define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
+ #define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
+ #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
--- /dev/null
+From hch@lst.de Tue Jan 10 11:22:36 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:33 +0100
+Subject: xfs: check return value of _trans_reserve_quota_nblks
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-3-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 4fd29ec47212c8cbf98916af519019ccc5e58e49 upstream.
+
+Check the return value of xfs_trans_reserve_quota_nblks for errors.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4902,8 +4902,11 @@ xfs_bmap_del_extent_delay(
+ * sb counters as we might have to borrow some blocks for the
+ * indirect block accounting.
+ */
+- xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
++ error = xfs_trans_reserve_quota_nblks(NULL, ip,
++ -((long)del->br_blockcount), 0,
+ isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
++ if (error)
++ return error;
+ ip->i_delayed_blks -= del->br_blockcount;
+
+ if (whichfork == XFS_COW_FORK)
--- /dev/null
+From hch@lst.de Tue Jan 10 11:25:29 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:45 +0100
+Subject: xfs: clean up cow fork reservation and tag inodes correctly
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-15-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 0260d8ff5f76617e3a55a1c471383ecb4404c3ad upstream.
+
+COW fork reservation is implemented via delayed allocation. The code is
+modeled after the traditional delalloc allocation code, but is slightly
+different in terms of how preallocation occurs. Rather than post-eof
+speculative preallocation, COW fork preallocation is implemented via a
+COW extent size hint that is designed to minimize fragmentation as a
+reflinked file is split over time.
+
+xfs_reflink_reserve_cow() still uses logic that is oriented towards
+dealing with post-eof speculative preallocation, however, and is stale
+or not necessarily correct. First, the EOF alignment to the COW extent
+size hint is implemented in xfs_bmapi_reserve_delalloc() (which does so
+correctly by aligning the start and end offsets) and so is not necessary
+in xfs_reflink_reserve_cow(). The backoff and retry logic on ENOSPC is
+also ineffective for the same reason, as xfs_bmapi_reserve_delalloc()
+will simply perform the same allocation request on the retry. Finally,
+since the COW extent size hint aligns the start and end offset of the
+range to allocate, the end_fsb != orig_end_fsb logic is not sufficient.
+Indeed, if a write request happens to end on an aligned offset, it is
+possible that we do not tag the inode for COW preallocation even though
+xfs_bmapi_reserve_delalloc() may have preallocated at the start offset.
+
+Kill the unnecessary, duplicate code in xfs_reflink_reserve_cow().
+Remove the inode tag logic as well since xfs_bmapi_reserve_delalloc()
+has been updated to tag the inode correctly.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c | 29 +++--------------------------
+ 1 file changed, 3 insertions(+), 26 deletions(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -245,11 +245,9 @@ xfs_reflink_reserve_cow(
+ {
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got;
+- xfs_fileoff_t end_fsb, orig_end_fsb;
+ int error = 0;
+ bool eof = false, trimmed;
+ xfs_extnum_t idx;
+- xfs_extlen_t align;
+
+ /*
+ * Search the COW fork extent list first. This serves two purposes:
+@@ -287,33 +285,12 @@ xfs_reflink_reserve_cow(
+ if (error)
+ return error;
+
+- end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
+-
+- align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
+- if (align)
+- end_fsb = roundup_64(end_fsb, align);
+-
+-retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+- end_fsb - imap->br_startoff, 0, &got, &idx, eof);
+- switch (error) {
+- case 0:
+- break;
+- case -ENOSPC:
+- case -EDQUOT:
+- /* retry without any preallocation */
++ imap->br_blockcount, 0, &got, &idx, eof);
++ if (error == -ENOSPC || error == -EDQUOT)
+ trace_xfs_reflink_cow_enospc(ip, imap);
+- if (end_fsb != orig_end_fsb) {
+- end_fsb = orig_end_fsb;
+- goto retry;
+- }
+- /*FALLTHRU*/
+- default:
++ if (error)
+ return error;
+- }
+-
+- if (end_fsb != orig_end_fsb)
+- xfs_inode_set_cowblocks_tag(ip);
+
+ trace_xfs_reflink_cow_alloc(ip, &got);
+ return 0;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:29:49 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:53 +0100
+Subject: xfs: complain if we don't get nextents bmap records
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-23-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 356a3225222e5bc4df88aef3419fb6424f18ab69 upstream.
+
+When reading into memory all extents of a btree-format inode fork,
+complain if the number of extents we find is not the same as the number
+of extents reported in the inode core. This is needed to stop an IO
+action from accessing the garbage areas of the in-core fork.
+
+[dchinner: removed redundant assert]
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -1377,8 +1377,9 @@ xfs_bmap_read_extents(
+ return error;
+ block = XFS_BUF_TO_BLOCK(bp);
+ }
++ if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
++ return -EFSCORRUPTED;
+ ASSERT(i == xfs_iext_count(ifp));
+- ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+ XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
+ return 0;
+ error0:
--- /dev/null
+From hch@lst.de Tue Jan 10 11:30:18 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:56 +0100
+Subject: xfs: don't allow di_size with high bit set
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-26-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit ef388e2054feedaeb05399ed654bdb06f385d294 upstream.
+
+The on-disk field di_size is used to set i_size, which is a signed
+integer of loff_t. If the high bit of di_size is set, we'll end up with
+a negative i_size, which will cause all sorts of problems. Since the
+VFS won't let us create a file with such length, we should catch them
+here in the verifier too.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -392,6 +392,14 @@ xfs_dinode_verify(
+ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
+ return false;
+
++ /* don't allow invalid i_size */
++ if (be64_to_cpu(dip->di_size) & (1ULL << 63))
++ return false;
++
++ /* No zero-length symlinks. */
++ if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
++ return false;
++
+ /* only version 3 or greater inodes are extensively verified here */
+ if (dip->di_version < 3)
+ return true;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:23:14 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:35 +0100
+Subject: xfs: don't BUG() on mixed direct and mapped I/O
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-5-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 04197b341f23b908193308b8d63d17ff23232598 upstream.
+
+We've had reports of generic/095 causing XFS to BUG() in
+__xfs_get_blocks() due to the existence of delalloc blocks on a
+direct I/O read. generic/095 issues a mix of various types of I/O,
+including direct and memory mapped I/O to a single file. This is
+clearly not supported behavior and is known to lead to such
+problems. E.g., the lack of exclusion between the direct I/O and
+write fault paths means that a write fault can allocate delalloc
+blocks in a region of a file that was previously a hole after the
+direct read has attempted to flush/inval the file range, but before
+it actually reads the block mapping. In turn, the direct read
+discovers a delalloc extent and cannot proceed.
+
+While the appropriate solution here is to not mix direct and memory
+mapped I/O to the same regions of the same file, the current
+BUG_ON() behavior is probably overkill as it can crash the entire
+system. Instead, localize the failure to the I/O in question by
+returning an error for a direct I/O that cannot be handled safely
+due to delalloc blocks. Be careful to allow the case of a direct
+write to post-eof delalloc blocks. This can occur due to speculative
+preallocation and is safe as post-eof blocks are not accompanied by
+dirty pages in pagecache (conversely, preallocation within eof must
+have been zeroed, and thus dirtied, before the inode size could have
+been increased beyond said blocks).
+
+Finally, provide an additional warning if a direct I/O write occurs
+while the file is memory mapped. This may not catch all problematic
+scenarios, but provides a hint that some known-to-be-problematic I/O
+methods are in use.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_aops.c | 22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -1361,6 +1361,26 @@ __xfs_get_blocks(
+ if (error)
+ goto out_unlock;
+
++ /*
++ * The only time we can ever safely find delalloc blocks on direct I/O
++ * is a dio write to post-eof speculative preallocation. All other
++ * scenarios are indicative of a problem or misuse (such as mixing
++ * direct and mapped I/O).
++ *
++ * The file may be unmapped by the time we get here so we cannot
++ * reliably fail the I/O based on mapping. Instead, fail the I/O if this
++ * is a read or a write within eof. Otherwise, carry on but warn as a
++ * precuation if the file happens to be mapped.
++ */
++ if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
++ if (!create || offset < i_size_read(VFS_I(ip))) {
++ WARN_ON_ONCE(1);
++ error = -EIO;
++ goto out_unlock;
++ }
++ WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
++ }
++
+ /* for DAX, we convert unwritten extents directly */
+ if (create &&
+ (!nimaps ||
+@@ -1450,8 +1470,6 @@ __xfs_get_blocks(
+ (new || ISUNWRITTEN(&imap))))
+ set_buffer_new(bh_result);
+
+- BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
+-
+ return 0;
+
+ out_unlock:
--- /dev/null
+From hch@lst.de Tue Jan 10 11:09:49 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:32 +0100
+Subject: xfs: don't call xfs_sb_quota_from_disk twice
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-2-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit e6fc6fcf4447c9266038c55c25e4c7c14bee110c upstream.
+
+Source xfsprogs commit: ee3754254e8c186c99b6cdd4d59f741759d04acb
+
+Kernel commit 5ef828c4 ("xfs: avoid false quotacheck after unclean
+shutdown") made xfs_sb_from_disk() also call xfs_sb_quota_from_disk
+by default.
+
+However, when this was merged to libxfs, existing separate
+calls to libxfs_sb_quota_from_disk remained, and calling it
+twice in a row on a V4 superblock leads to issues, because:
+
+ if (sbp->sb_qflags & XFS_PQUOTA_ACCT) {
+...
+ sbp->sb_pquotino = sbp->sb_gquotino;
+ sbp->sb_gquotino = NULLFSINO;
+
+and after the second call, we have set both pquotino and gquotino
+to NULLFSINO.
+
+Fix this by making it safe to call twice, and also remove the extra
+calls to libxfs_sb_quota_from_disk.
+
+This is only spotted when running xfstests with "-m crc=0" because
+the sb_from_disk change came about after V5 became default, and
+the above behavior only exists on a V4 superblock.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -338,13 +338,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sb
+ XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
+ sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
+
+- if (sbp->sb_qflags & XFS_PQUOTA_ACCT) {
++ if (sbp->sb_qflags & XFS_PQUOTA_ACCT &&
++ sbp->sb_gquotino != NULLFSINO) {
+ /*
+ * In older version of superblock, on-disk superblock only
+ * has sb_gquotino, and in-core superblock has both sb_gquotino
+ * and sb_pquotino. But, only one of them is supported at any
+ * point of time. So, if PQUOTA is set in disk superblock,
+- * copy over sb_gquotino to sb_pquotino.
++ * copy over sb_gquotino to sb_pquotino. The NULLFSINO test
++ * above is to make sure we don't do this twice and wipe them
++ * both out!
+ */
+ sbp->sb_pquotino = sbp->sb_gquotino;
+ sbp->sb_gquotino = NULLFSINO;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:30:27 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:57 +0100
+Subject: xfs: don't cap maximum dedupe request length
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-27-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 1bb33a98702d8360947f18a44349df75ba555d5d upstream.
+
+After various discussions on linux-fsdevel, it has been decided that it
+is not necessary to cap the length of a dedupe request, and that
+correctly-written userspace client programs will be able to absorb the
+change. Therefore, remove the length clamping behavior.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -939,7 +939,6 @@ xfs_file_clone_range(
+ len, false);
+ }
+
+-#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
+ STATIC ssize_t
+ xfs_file_dedupe_range(
+ struct file *src_file,
+@@ -950,14 +949,6 @@ xfs_file_dedupe_range(
+ {
+ int error;
+
+- /*
+- * Limit the total length we will dedupe for each operation.
+- * This is intended to bound the total time spent in this
+- * ioctl to something sane.
+- */
+- if (len > XFS_MAX_DEDUPE_LEN)
+- len = XFS_MAX_DEDUPE_LEN;
+-
+ error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ len, true);
+ if (error)
--- /dev/null
+From hch@lst.de Tue Jan 10 11:29:59 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:54 +0100
+Subject: xfs: don't crash if reading a directory results in an unexpected hole
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-24-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 96a3aefb8ffde23180130460b0b2407b328eb727 upstream.
+
+In xfs_dir3_data_read, we can encounter the situation where err == 0 and
+*bpp == NULL if the given bno offset happens to be a hole; this leads to
+a crash if we try to set the buffer type after the _da_read_buf call.
+Holes can happen due to corrupt or malicious entries in the bmbt data,
+so be a little more careful when we're handling buffers.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_data.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_data.c
++++ b/fs/xfs/libxfs/xfs_dir2_data.c
+@@ -329,7 +329,7 @@ xfs_dir3_data_read(
+
+ err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+ XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+- if (!err && tp)
++ if (!err && tp && *bpp)
+ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
+ return err;
+ }
--- /dev/null
+From hch@lst.de Tue Jan 10 11:23:00 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:34 +0100
+Subject: xfs: don't skip cow forks w/ delalloc blocks in cowblocks scan
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-4-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 399372349a7f9b2d7e56e4fa4467c69822d07024 upstream.
+
+The cowblocks background scanner currently clears the cowblocks tag
+for inodes without any real allocations in the cow fork. This
+excludes inodes with only delalloc blocks in the cow fork. While we
+might never expect to clear delalloc blocks from the cow fork in the
+background scanner, it is not necessarily correct to clear the
+cowblocks tag from such inodes.
+
+For example, if the background scanner happens to process an inode
+between a buffered write and writeback, the scanner catches the
+inode in a state after delalloc blocks have been allocated to the
+cow fork but before the delalloc blocks have been converted to real
+blocks by writeback. The background scanner then incorrectly clears
+the cowblocks tag, even if part of the aforementioned delalloc
+reservation will not be remapped to the data fork (i.e., extra
+blocks due to the cowextsize hint). This means that any such
+additional blocks in the cow fork might never be reclaimed by the
+background scanner and could persist until the inode itself is
+reclaimed.
+
+To address this problem, only skip and clear inodes without any cow
+fork allocations whatsoever from the background scanner. While we
+generally do not want to cancel delalloc reservations from the
+background scanner, the pagecache dirty check following the
+cowblocks check should prevent that situation. If we do end up with
+delalloc cow fork blocks without a dirty address space mapping, this
+is probably an indication that something has gone wrong and the
+blocks should be reclaimed, as they may never be converted to a real
+allocation.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 7 ++++++-
+ fs/xfs/xfs_reflink.c | 34 ----------------------------------
+ fs/xfs/xfs_reflink.h | 2 --
+ 3 files changed, 6 insertions(+), 37 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1580,10 +1580,15 @@ xfs_inode_free_cowblocks(
+ struct xfs_eofblocks *eofb = args;
+ bool need_iolock = true;
+ int match;
++ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+
+ ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
+
+- if (!xfs_reflink_has_real_cow_blocks(ip)) {
++ /*
++ * Just clear the tag if we have an empty cow fork or none at all. It's
++ * possible the inode was fully unshared since it was originally tagged.
++ */
++ if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
+ trace_xfs_inode_free_cowblocks_invalid(ip);
+ xfs_inode_clear_cowblocks_tag(ip);
+ return 0;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1697,37 +1697,3 @@ out:
+ trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
+ return error;
+ }
+-
+-/*
+- * Does this inode have any real CoW reservations?
+- */
+-bool
+-xfs_reflink_has_real_cow_blocks(
+- struct xfs_inode *ip)
+-{
+- struct xfs_bmbt_irec irec;
+- struct xfs_ifork *ifp;
+- struct xfs_bmbt_rec_host *gotp;
+- xfs_extnum_t idx;
+-
+- if (!xfs_is_reflink_inode(ip))
+- return false;
+-
+- /* Go find the old extent in the CoW fork. */
+- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+- gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
+- while (gotp) {
+- xfs_bmbt_get_all(gotp, &irec);
+-
+- if (!isnullstartblock(irec.br_startblock))
+- return true;
+-
+- /* Roll on... */
+- idx++;
+- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+- break;
+- gotp = xfs_iext_get_ext(ifp, idx);
+- }
+-
+- return false;
+-}
+--- a/fs/xfs/xfs_reflink.h
++++ b/fs/xfs/xfs_reflink.h
+@@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(
+ extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t len);
+
+-extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip);
+-
+ #endif /* __XFS_REFLINK_H */
--- /dev/null
+From hch@lst.de Tue Jan 10 11:30:09 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:55 +0100
+Subject: xfs: error out if trying to add attrs and anextents > 0
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-25-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 0f352f8ee8412bd9d34fb2a6411241da61175c0e upstream.
+
+We shouldn't assert if somehow we end up trying to add an attr fork to
+an inode that apparently already has attr extents because this is an
+indication of on-disk corruption. Instead, return an error code to
+userspace.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -1153,6 +1153,10 @@ xfs_bmap_add_attrfork(
+ goto trans_cancel;
+ if (XFS_IFORK_Q(ip))
+ goto trans_cancel;
++ if (ip->i_d.di_anextents != 0) {
++ error = -EFSCORRUPTED;
++ goto trans_cancel;
++ }
+ if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
+ /*
+ * For inodes coming from pre-6.2 filesystems.
+@@ -1160,7 +1164,6 @@ xfs_bmap_add_attrfork(
+ ASSERT(ip->i_d.di_aformat == 0);
+ ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+ }
+- ASSERT(ip->i_d.di_anextents == 0);
+
+ xfs_trans_ijoin(tp, ip, 0);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
--- /dev/null
+From hch@lst.de Tue Jan 10 11:24:25 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:40 +0100
+Subject: xfs: factor rmap btree size into the indlen calculations
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-10-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit fd26a88093bab6529ea2de819114ca92dbd1d71d upstream.
+
+When we're estimating the amount of space it's going to take to satisfy
+a delalloc reservation, we need to include the space that we might need
+to grow the rmapbt. This helps us to avoid running out of space later
+when _iomap_write_allocate needs more space than we reserved. Eryu Guan
+observed this happening on generic/224 when sunit/swidth were set.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -49,6 +49,7 @@
+ #include "xfs_rmap.h"
+ #include "xfs_ag_resv.h"
+ #include "xfs_refcount.h"
++#include "xfs_rmap_btree.h"
+
+
+ kmem_zone_t *xfs_bmap_free_item_zone;
+@@ -190,8 +191,12 @@ xfs_bmap_worst_indlen(
+ int maxrecs; /* maximum record count at this level */
+ xfs_mount_t *mp; /* mount structure */
+ xfs_filblks_t rval; /* return value */
++ xfs_filblks_t orig_len;
+
+ mp = ip->i_mount;
++
++ /* Calculate the worst-case size of the bmbt. */
++ orig_len = len;
+ maxrecs = mp->m_bmap_dmxr[0];
+ for (level = 0, rval = 0;
+ level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
+@@ -199,12 +204,20 @@ xfs_bmap_worst_indlen(
+ len += maxrecs - 1;
+ do_div(len, maxrecs);
+ rval += len;
+- if (len == 1)
+- return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
++ if (len == 1) {
++ rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+ level - 1;
++ break;
++ }
+ if (level == 0)
+ maxrecs = mp->m_bmap_dmxr[1];
+ }
++
++ /* Calculate the worst-case size of the rmapbt. */
++ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
++ rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
++ mp->m_rmap_maxlevels;
++
+ return rval;
+ }
+
--- /dev/null
+From hch@lst.de Tue Jan 10 11:31:19 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:39:02 +0100
+Subject: xfs: fix crash and data corruption due to removal of busy COW extents
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-32-git-send-email-hch@lst.de>
+
+
+commit a1b7a4dea6166cf46be895bce4aac67ea5160fe8 upstream.
+
+There is a race window between write_cache_pages calling
+clear_page_dirty_for_io and XFS calling set_page_writeback, in which
+the mapping for an inode is tagged neither as dirty, nor as writeback.
+
+If the COW shrinker hits in exactly that window we'll remove the delayed
+COW extents and writepages trying to write it back, which in release
+kernels will manifest as corruption of the bmap btree, and in debug
+kernels will trip the ASSERT about now calling xfs_bmapi_write with the
+COWFORK flag for holes. A complex customer load manages to hit this
+window fairly reliably, probably by always having COW writeback in flight
+while the cow shrinker runs.
+
+This patch adds another check for having the I_DIRTY_PAGES flag set,
+which is still set during this race window. While this fixes the problem
+I'm still not overly happy about the way the COW shrinker works as it
+still seems a bit fragile.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1599,7 +1599,8 @@ xfs_inode_free_cowblocks(
+ * If the mapping is dirty or under writeback we cannot touch the
+ * CoW fork. Leave it alone if we're in the midst of a directio.
+ */
+- if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
++ if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
++ mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+ atomic_read(&VFS_I(ip)->i_dio_count))
+ return 0;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:31:00 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:39:00 +0100
+Subject: xfs: fix double-cleanup when CUI recovery fails
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-30-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 7a21272b088894070391a94fdd1c67014020fa1d upstream.
+
+Dan Carpenter reported a double-free of rcur if _defer_finish fails
+while we're recovering CUI items. Fix the error recovery to prevent
+this.
+
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_refcount_item.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -526,13 +526,14 @@ xfs_cui_recover(
+ xfs_refcount_finish_one_cleanup(tp, rcur, error);
+ error = xfs_defer_finish(&tp, &dfops, NULL);
+ if (error)
+- goto abort_error;
++ goto abort_defer;
+ set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+ error = xfs_trans_commit(tp);
+ return error;
+
+ abort_error:
+ xfs_refcount_finish_one_cleanup(tp, rcur, error);
++abort_defer:
+ xfs_defer_cancel(&dfops);
+ xfs_trans_cancel(tp);
+ return error;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:31:32 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:39:03 +0100
+Subject: xfs: fix max_retries _show and _store functions
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Carlos Maiolino <cmaiolino@redhat.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-33-git-send-email-hch@lst.de>
+
+
+From: Carlos Maiolino <cmaiolino@redhat.com>
+
+commit ff97f2399edac1e0fb3fa7851d5fbcbdf04717cf upstream.
+
+max_retries _show and _store functions should test against cfg->max_retries,
+not cfg->retry_timeout
+
+Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_sysfs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_sysfs.c
++++ b/fs/xfs/xfs_sysfs.c
+@@ -396,7 +396,7 @@ max_retries_show(
+ int retries;
+ struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+- if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER)
++ if (cfg->max_retries == XFS_ERR_RETRY_FOREVER)
+ retries = -1;
+ else
+ retries = cfg->max_retries;
+@@ -422,7 +422,7 @@ max_retries_store(
+ return -EINVAL;
+
+ if (val == -1)
+- cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
++ cfg->max_retries = XFS_ERR_RETRY_FOREVER;
+ else
+ cfg->max_retries = val;
+ return count;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:23:57 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:38 +0100
+Subject: xfs: fix unbalanced inode reclaim flush locking
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-8-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 98efe8af1c9ffac47e842b7a75ded903e2f028da upstream.
+
+Filesystem shutdown testing on an older distro kernel has uncovered an
+imbalanced locking pattern for the inode flush lock in
+xfs_reclaim_inode(). Specifically, there is a double unlock sequence
+between the call to xfs_iflush_abort() and xfs_reclaim_inode() at the
+"reclaim:" label.
+
+This actually does not cause obvious problems on current kernels due to
+the current flush lock implementation. Older kernels use a counting
+based flush lock mechanism, however, which effectively breaks the lock
+indefinitely when an already unlocked flush lock is repeatedly unlocked.
+Though this only currently occurs on filesystem shutdown, it has
+reproduced the effect of elevating an fs shutdown to a system-wide crash
+or hang.
+
+As it turns out, the flush lock is not actually required for the reclaim
+logic in xfs_reclaim_inode() because by that time we have already cycled
+the flush lock once while holding ILOCK_EXCL. Therefore, remove the
+additional flush lock/unlock cycle around the 'reclaim:' label and
+update branches into this label to release the flush lock where
+appropriate. Add an assert to xfs_ifunlock() to help prevent future
+occurences of the same problem.
+
+Reported-by: Zorro Lang <zlang@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 27 ++++++++++++++-------------
+ fs/xfs/xfs_inode.h | 11 ++++++-----
+ 2 files changed, 20 insertions(+), 18 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -123,7 +123,6 @@ __xfs_inode_free(
+ {
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+- ASSERT(!xfs_isiflocked(ip));
+ XFS_STATS_DEC(ip->i_mount, vn_active);
+
+ call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+@@ -133,6 +132,8 @@ void
+ xfs_inode_free(
+ struct xfs_inode *ip)
+ {
++ ASSERT(!xfs_isiflocked(ip));
++
+ /*
+ * Because we use RCU freeing we need to ensure the inode always
+ * appears to be reclaimed with an invalid inode number when in the
+@@ -981,6 +982,7 @@ restart:
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_iunpin_wait(ip);
++ /* xfs_iflush_abort() drops the flush lock */
+ xfs_iflush_abort(ip, false);
+ goto reclaim;
+ }
+@@ -989,10 +991,10 @@ restart:
+ goto out_ifunlock;
+ xfs_iunpin_wait(ip);
+ }
+- if (xfs_iflags_test(ip, XFS_ISTALE))
+- goto reclaim;
+- if (xfs_inode_clean(ip))
++ if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
++ xfs_ifunlock(ip);
+ goto reclaim;
++ }
+
+ /*
+ * Never flush out dirty data during non-blocking reclaim, as it would
+@@ -1030,25 +1032,24 @@ restart:
+ xfs_buf_relse(bp);
+ }
+
+- xfs_iflock(ip);
+ reclaim:
++ ASSERT(!xfs_isiflocked(ip));
++
+ /*
+ * Because we use RCU freeing we need to ensure the inode always appears
+ * to be reclaimed with an invalid inode number when in the free state.
+- * We do this as early as possible under the ILOCK and flush lock so
+- * that xfs_iflush_cluster() can be guaranteed to detect races with us
+- * here. By doing this, we guarantee that once xfs_iflush_cluster has
+- * locked both the XFS_ILOCK and the flush lock that it will see either
+- * a valid, flushable inode that will serialise correctly against the
+- * locks below, or it will see a clean (and invalid) inode that it can
+- * skip.
++ * We do this as early as possible under the ILOCK so that
++ * xfs_iflush_cluster() can be guaranteed to detect races with us here.
++ * By doing this, we guarantee that once xfs_iflush_cluster has locked
++ * XFS_ILOCK that it will see either a valid, flushable inode that will
++ * serialise correctly, or it will see a clean (and invalid) inode that
++ * it can skip.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ip->i_flags = XFS_IRECLAIM;
+ ip->i_ino = 0;
+ spin_unlock(&ip->i_flags_lock);
+
+- xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(
+ * Synchronize processes attempting to flush the in-core inode back to disk.
+ */
+
++static inline int xfs_isiflocked(struct xfs_inode *ip)
++{
++ return xfs_iflags_test(ip, XFS_IFLOCK);
++}
++
+ extern void __xfs_iflock(struct xfs_inode *ip);
+
+ static inline int xfs_iflock_nowait(struct xfs_inode *ip)
+@@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs
+
+ static inline void xfs_ifunlock(struct xfs_inode *ip)
+ {
++ ASSERT(xfs_isiflocked(ip));
+ xfs_iflags_clear(ip, XFS_IFLOCK);
+ smp_mb();
+ wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
+ }
+
+-static inline int xfs_isiflocked(struct xfs_inode *ip)
+-{
+- return xfs_iflags_test(ip, XFS_IFLOCK);
+-}
+-
+ /*
+ * Flags for inode locking.
+ * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
--- /dev/null
+From hch@lst.de Tue Jan 10 11:29:15 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:51 +0100
+Subject: xfs: forbid AG btrees with level == 0
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-21-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit d2a047f31e86941fa896e0e3271536d50aba415e upstream.
+
+There is no such thing as a zero-level AG btree since even a single-node
+zero-records btree has one level. Btree cursor constructors read
+cur_nlevels straight from disk and then access things like
+cur_bufs[cur_nlevels - 1] which is /really/ bad if cur_nlevels is zero!
+Therefore, strengthen the verifiers to prevent this possibility.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 10 +++++++---
+ fs/xfs/libxfs/xfs_ialloc.c | 9 ++++++++-
+ 2 files changed, 15 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2455,12 +2455,15 @@ xfs_agf_verify(
+ be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+ return false;
+
+- if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
++ if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
++ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
++ be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
+ return false;
+
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
++ (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
++ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
+ return false;
+
+ /*
+@@ -2477,7 +2480,8 @@ xfs_agf_verify(
+ return false;
+
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+- be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
++ (be32_to_cpu(agf->agf_refcount_level) < 1 ||
++ be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
+ return false;
+
+ return true;;
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2510,8 +2510,15 @@ xfs_agi_verify(
+ if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
+ return false;
+
+- if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
++ if (be32_to_cpu(agi->agi_level) < 1 ||
++ be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
+ return false;
++
++ if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
++ (be32_to_cpu(agi->agi_free_level) < 1 ||
++ be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
++ return false;
++
+ /*
+ * during growfs operations, the perag is not fully initialised,
+ * so we can't use it for any useful checking. growfs ensures we can't
--- /dev/null
+From hch@lst.de Tue Jan 10 11:26:23 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:50 +0100
+Subject: xfs: handle cow fork in xfs_bmap_trace_exlist
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-20-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit c44a1f22626c153976289e1cd67bdcdfefc16e1f upstream.
+
+By inspection, xfs_bmap_trace_exlist isn't handling cow forks,
+and will trace the data fork instead.
+
+Fix this by setting state appropriately if whichfork
+== XFS_COW_FORK.
+
+()___()
+< @ @ >
+ | |
+ {o_o}
+ (|)
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -518,7 +518,7 @@ void
+ xfs_bmap_trace_exlist(
+ xfs_inode_t *ip, /* incore inode pointer */
+ xfs_extnum_t cnt, /* count of entries in the list */
+- int whichfork, /* data or attr fork */
++ int whichfork, /* data or attr or cow fork */
+ unsigned long caller_ip)
+ {
+ xfs_extnum_t idx; /* extent record index */
+@@ -527,6 +527,8 @@ xfs_bmap_trace_exlist(
+
+ if (whichfork == XFS_ATTR_FORK)
+ state |= BMAP_ATTRFORK;
++ else if (whichfork == XFS_COW_FORK)
++ state |= BMAP_COWFORK;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ ASSERT(cnt == xfs_iext_count(ifp));
--- /dev/null
+From hch@lst.de Tue Jan 10 11:30:37 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:58 +0100
+Subject: xfs: ignore leaf attr ichdr.count in verifier during log replay
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-28-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 2e1d23370e75d7d89350d41b4ab58c7f6a0e26b2 upstream.
+
+When we create a new attribute, we first create a shortform
+attribute, and try to fit the new attribute into it.
+If that fails, we copy the (empty) attribute into a leaf attribute,
+and do the copy again. Thus there can be a transient state where
+we have an empty leaf attribute.
+
+If we encounter this during log replay, the verifier will fail.
+So add a test to ignore this part of the leaf attr verification
+during log replay.
+
+Thanks as usual to dchinner for spotting the problem.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -253,6 +253,7 @@ xfs_attr3_leaf_verify(
+ {
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_attr_leafblock *leaf = bp->b_addr;
++ struct xfs_perag *pag = bp->b_pag;
+ struct xfs_attr3_icleaf_hdr ichdr;
+
+ xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
+@@ -273,7 +274,12 @@ xfs_attr3_leaf_verify(
+ if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
+ return false;
+ }
+- if (ichdr.count == 0)
++ /*
++ * In recovery there is a transient state where count == 0 is valid
++ * because we may have transitioned an empty shortform attr to a leaf
++ * if the attr didn't fit in shortform.
++ */
++ if (pag && pag->pagf_init && ichdr.count == 0)
+ return false;
+
+ /* XXX: need to range check rest of attr header values */
--- /dev/null
+From hch@lst.de Tue Jan 10 11:26:01 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:48 +0100
+Subject: xfs: Move AGI buffer type setting to xfs_read_agi
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-18-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 200237d6746faaeaf7f4ff4abbf13f3917cee60a upstream.
+
+We've missed properly setting the buffer type for
+an AGI transaction in 3 spots now, so just move it
+into xfs_read_agi() and set it if we are in a transaction
+to avoid the problem in the future.
+
+This is similar to how it is done in i.e. the dir3
+and attr3 read functions.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ialloc.c | 4 ++--
+ fs/xfs/xfs_inode.c | 2 --
+ fs/xfs/xfs_log_recover.c | 1 -
+ 3 files changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2450,8 +2450,6 @@ xfs_ialloc_log_agi(
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+ #endif
+
+- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
+-
+ /*
+ * Compute byte offsets for the first and last fields in the first
+ * region and log the agi buffer. This only logs up through
+@@ -2592,6 +2590,8 @@ xfs_read_agi(
+ XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
+ if (error)
+ return error;
++ if (tp)
++ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF);
+
+ xfs_buf_set_ref(*bpp, XFS_AGI_REF);
+ return 0;
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2041,7 +2041,6 @@ xfs_iunlink(
+ agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
+ offset = offsetof(xfs_agi_t, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket_index);
+- xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+ xfs_trans_log_buf(tp, agibp, offset,
+ (offset + sizeof(xfs_agino_t) - 1));
+ return 0;
+@@ -2133,7 +2132,6 @@ xfs_iunlink_remove(
+ agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
+ offset = offsetof(xfs_agi_t, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket_index);
+- xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+ xfs_trans_log_buf(tp, agibp, offset,
+ (offset + sizeof(xfs_agino_t) - 1));
+ } else {
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -4929,7 +4929,6 @@ xlog_recover_clear_agi_bucket(
+ agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+ offset = offsetof(xfs_agi_t, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket);
+- xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
+ xfs_trans_log_buf(tp, agibp, offset,
+ (offset + sizeof(xfs_agino_t) - 1));
+
--- /dev/null
+From hch@lst.de Tue Jan 10 11:24:08 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:39 +0100
+Subject: xfs: new inode extent list lookup helpers
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-9-git-send-email-hch@lst.de>
+
+
+commit 93533c7855c3c78c8a900cac65c8d669bb14935d upstream.
+
+xfs_iext_lookup_extent looks up a single extent at the passed in offset,
+and returns the extent covering the area, or the one behind it in case
+of a hole, as well as the index of the returned extent in arguments,
+as well as a simple bool as return value that is set to false if no
+extent could be found because the offset is behind EOF. It is a simpler
+replacement for xfs_bmap_search_extent that leaves looking up the rarely
+needed previous extent to the caller and has a nicer calling convention.
+
+xfs_iext_get_extent is a helper for iterating over the extent list,
+it takes an extent index as input, and returns the extent at that index
+in it's expanded form in an argument if it exists. The actual return
+value is a bool whether the index is valid or not.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_fork.c | 46 +++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/libxfs/xfs_inode_fork.h | 6 +++++
+ 2 files changed, 52 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -2003,3 +2003,49 @@ xfs_ifork_init_cow(
+ ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
+ ip->i_cnextents = 0;
+ }
++
++/*
++ * Lookup the extent covering bno.
++ *
++ * If there is an extent covering bno return the extent index, and store the
++ * expanded extent structure in *gotp, and the extent index in *idx.
++ * If there is no extent covering bno, but there is an extent after it (e.g.
++ * it lies in a hole) return that extent in *gotp and its index in *idx
++ * instead.
++ * If bno is beyond the last extent return false, and return the index after
++ * the last valid index in *idxp.
++ */
++bool
++xfs_iext_lookup_extent(
++ struct xfs_inode *ip,
++ struct xfs_ifork *ifp,
++ xfs_fileoff_t bno,
++ xfs_extnum_t *idxp,
++ struct xfs_bmbt_irec *gotp)
++{
++ struct xfs_bmbt_rec_host *ep;
++
++ XFS_STATS_INC(ip->i_mount, xs_look_exlist);
++
++ ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
++ if (!ep)
++ return false;
++ xfs_bmbt_get_all(ep, gotp);
++ return true;
++}
++
++/*
++ * Return true if there is an extent at index idx, and return the expanded
++ * extent structure at idx in that case. Else return false.
++ */
++bool
++xfs_iext_get_extent(
++ struct xfs_ifork *ifp,
++ xfs_extnum_t idx,
++ struct xfs_bmbt_irec *gotp)
++{
++ if (idx < 0 || idx >= xfs_iext_count(ifp))
++ return false;
++ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
++ return true;
++}
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -182,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct
+ void xfs_iext_irec_compact_full(struct xfs_ifork *);
+ void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
+
++bool xfs_iext_lookup_extent(struct xfs_inode *ip,
++ struct xfs_ifork *ifp, xfs_fileoff_t bno,
++ xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
++bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
++ struct xfs_bmbt_irec *gotp);
++
+ extern struct kmem_zone *xfs_ifork_zone;
+
+ extern void xfs_ifork_init_cow(struct xfs_inode *ip);
--- /dev/null
+From hch@lst.de Tue Jan 10 11:25:53 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:47 +0100
+Subject: xfs: pass post-eof speculative prealloc blocks to bmapi
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-17-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f782088c9e5d08e9494c63e68b4e85716df3e5f8 upstream.
+
+xfs_file_iomap_begin_delay() implements post-eof speculative
+preallocation by extending the block count of the requested delayed
+allocation. Now that xfs_bmapi_reserve_delalloc() has been updated to
+handle prealloc blocks separately and tag the inode, update
+xfs_file_iomap_begin_delay() to use the new parameter and rely on the
+former to tag the inode.
+
+Note that this patch does not change behavior.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c | 33 +++++++++++++--------------------
+ 1 file changed, 13 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -536,10 +536,11 @@ xfs_file_iomap_begin_delay(
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t maxbytes_fsb =
+ XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
+- xfs_fileoff_t end_fsb, orig_end_fsb;
++ xfs_fileoff_t end_fsb;
+ int error = 0, eof = 0;
+ struct xfs_bmbt_irec got;
+ xfs_extnum_t idx;
++ xfs_fsblock_t prealloc_blocks = 0;
+
+ ASSERT(!XFS_IS_REALTIME_INODE(ip));
+ ASSERT(!xfs_get_extsz_hint(ip));
+@@ -594,33 +595,32 @@ xfs_file_iomap_begin_delay(
+ * the lower level functions are updated.
+ */
+ count = min_t(loff_t, count, 1024 * PAGE_SIZE);
+- end_fsb = orig_end_fsb =
+- min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
++ end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+
+ if (eof) {
+- xfs_fsblock_t prealloc_blocks;
+-
+ prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
+ if (prealloc_blocks) {
+ xfs_extlen_t align;
+ xfs_off_t end_offset;
++ xfs_fileoff_t p_end_fsb;
+
+ end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
+- end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
+- prealloc_blocks;
++ p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
++ prealloc_blocks;
+
+ align = xfs_eof_alignment(ip, 0);
+ if (align)
+- end_fsb = roundup_64(end_fsb, align);
++ p_end_fsb = roundup_64(p_end_fsb, align);
+
+- end_fsb = min(end_fsb, maxbytes_fsb);
+- ASSERT(end_fsb > offset_fsb);
++ p_end_fsb = min(p_end_fsb, maxbytes_fsb);
++ ASSERT(p_end_fsb > offset_fsb);
++ prealloc_blocks = p_end_fsb - end_fsb;
+ }
+ }
+
+ retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+- end_fsb - offset_fsb, 0, &got, &idx, eof);
++ end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
+ switch (error) {
+ case 0:
+ break;
+@@ -628,8 +628,8 @@ retry:
+ case -EDQUOT:
+ /* retry without any preallocation */
+ trace_xfs_delalloc_enospc(ip, offset, count);
+- if (end_fsb != orig_end_fsb) {
+- end_fsb = orig_end_fsb;
++ if (prealloc_blocks) {
++ prealloc_blocks = 0;
+ goto retry;
+ }
+ /*FALLTHRU*/
+@@ -637,13 +637,6 @@ retry:
+ goto out_unlock;
+ }
+
+- /*
+- * Tag the inode as speculatively preallocated so we can reclaim this
+- * space on demand, if necessary.
+- */
+- if (end_fsb != orig_end_fsb)
+- xfs_inode_set_eofblocks_tag(ip);
+-
+ trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
+ done:
+ if (isnullstartblock(got.br_startblock))
--- /dev/null
+From hch@lst.de Tue Jan 10 11:26:14 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:49 +0100
+Subject: xfs: pass state not whichfork to trace_xfs_extlist
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-19-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 7710517fc37b1899722707883b54694ea710b3c0 upstream.
+
+When xfs_bmap_trace_exlist called trace_xfs_extlist,
+it sent in the "whichfork" var instead of the bmap "state"
+as expected (even though state was already set up for this
+purpose).
+
+As a result, the xfs_bmap_class in tracing code used
+"whichfork" not state in xfs_iext_state_to_fork(), and got
+the wrong ifork pointer. It all goes downhill from
+there, including an ASSERT when ifp_bytes is empty
+by the time it reaches xfs_iext_get_ext():
+
+XFS: Assertion failed: idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -531,7 +531,7 @@ xfs_bmap_trace_exlist(
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ ASSERT(cnt == xfs_iext_count(ifp));
+ for (idx = 0; idx < cnt; idx++)
+- trace_xfs_extlist(ip, idx, whichfork, caller_ip);
++ trace_xfs_extlist(ip, idx, state, caller_ip);
+ }
+
+ /*
--- /dev/null
+From hch@lst.de Tue Jan 10 11:23:27 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:36 +0100
+Subject: xfs: provide helper for counting extents from if_bytes
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@sandeen.net>, Eric Sandeen <sandeen@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-6-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 5d829300bee000980a09ac2ccb761cb25867b67c upstream.
+
+The open-coded pattern:
+
+ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)
+
+is all over the xfs code; provide a new helper
+xfs_iext_count(ifp) to count the number of inline extents
+in an inode fork.
+
+[dchinner: pick up several missed conversions]
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 49 +++++++++++++++++++----------------------
+ fs/xfs/libxfs/xfs_inode_fork.c | 31 +++++++++++++++----------
+ fs/xfs/libxfs/xfs_inode_fork.h | 1
+ fs/xfs/xfs_bmap_util.c | 34 +++++++++++-----------------
+ fs/xfs/xfs_inode_item.c | 4 +--
+ fs/xfs/xfs_ioctl.c | 6 +----
+ fs/xfs/xfs_qm.c | 2 -
+ fs/xfs/xfs_reflink.c | 4 +--
+ 8 files changed, 64 insertions(+), 67 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -515,7 +515,7 @@ xfs_bmap_trace_exlist(
+ state |= BMAP_ATTRFORK;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+- ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
++ ASSERT(cnt == xfs_iext_count(ifp));
+ for (idx = 0; idx < cnt; idx++)
+ trace_xfs_extlist(ip, idx, whichfork, caller_ip);
+ }
+@@ -811,7 +811,7 @@ try_another_ag:
+ XFS_BTREE_LONG_PTRS);
+
+ arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ for (cnt = i = 0; i < nextents; i++) {
+ ep = xfs_iext_get_ext(ifp, i);
+ if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
+@@ -1296,7 +1296,7 @@ xfs_bmap_read_extents(
+ /*
+ * Here with bp and block set to the leftmost leaf node in the tree.
+ */
+- room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ room = xfs_iext_count(ifp);
+ i = 0;
+ /*
+ * Loop over all leaf nodes. Copy information to the extent records.
+@@ -1361,7 +1361,7 @@ xfs_bmap_read_extents(
+ return error;
+ block = XFS_BUF_TO_BLOCK(bp);
+ }
+- ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
++ ASSERT(i == xfs_iext_count(ifp));
+ ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+ XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
+ return 0;
+@@ -1404,7 +1404,7 @@ xfs_bmap_search_multi_extents(
+ if (lastx > 0) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
+ }
+- if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
++ if (lastx < xfs_iext_count(ifp)) {
+ xfs_bmbt_get_all(ep, gotp);
+ *eofp = 0;
+ } else {
+@@ -1497,7 +1497,7 @@ xfs_bmap_first_unused(
+ (error = xfs_iread_extents(tp, ip, whichfork)))
+ return error;
+ lowest = *first_unused;
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
+ xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
+ off = xfs_bmbt_get_startoff(ep);
+@@ -1582,7 +1582,7 @@ xfs_bmap_last_extent(
+ return error;
+ }
+
+- nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ if (nextents == 0) {
+ *is_empty = 1;
+ return 0;
+@@ -1735,7 +1735,7 @@ xfs_bmap_add_extent_delay_real(
+ &bma->ip->i_d.di_nextents);
+
+ ASSERT(bma->idx >= 0);
+- ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++ ASSERT(bma->idx <= xfs_iext_count(ifp));
+ ASSERT(!isnullstartblock(new->br_startblock));
+ ASSERT(!bma->cur ||
+ (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+@@ -1794,7 +1794,7 @@ xfs_bmap_add_extent_delay_real(
+ * Don't set contiguous if the combined extent would be too large.
+ * Also check for all-three-contiguous being too large.
+ */
+- if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
++ if (bma->idx < xfs_iext_count(ifp) - 1) {
+ state |= BMAP_RIGHT_VALID;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
+
+@@ -2300,7 +2300,7 @@ xfs_bmap_add_extent_unwritten_real(
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+ ASSERT(*idx >= 0);
+- ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++ ASSERT(*idx <= xfs_iext_count(ifp));
+ ASSERT(!isnullstartblock(new->br_startblock));
+
+ XFS_STATS_INC(mp, xs_add_exlist);
+@@ -2356,7 +2356,7 @@ xfs_bmap_add_extent_unwritten_real(
+ * Don't set contiguous if the combined extent would be too large.
+ * Also check for all-three-contiguous being too large.
+ */
+- if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
++ if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+ state |= BMAP_RIGHT_VALID;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
+ if (isnullstartblock(RIGHT.br_startblock))
+@@ -2836,7 +2836,7 @@ xfs_bmap_add_extent_hole_delay(
+ * Check and set flags if the current (right) segment exists.
+ * If it doesn't exist, we're converting the hole at end-of-file.
+ */
+- if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
++ if (*idx < xfs_iext_count(ifp)) {
+ state |= BMAP_RIGHT_VALID;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
+
+@@ -2966,7 +2966,7 @@ xfs_bmap_add_extent_hole_real(
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+
+ ASSERT(bma->idx >= 0);
+- ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++ ASSERT(bma->idx <= xfs_iext_count(ifp));
+ ASSERT(!isnullstartblock(new->br_startblock));
+ ASSERT(!bma->cur ||
+ !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+@@ -2992,7 +2992,7 @@ xfs_bmap_add_extent_hole_real(
+ * Check and set flags if this segment has a current value.
+ * Not true if we're inserting into the "hole" at eof.
+ */
+- if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
++ if (bma->idx < xfs_iext_count(ifp)) {
+ state |= BMAP_RIGHT_VALID;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
+ if (isnullstartblock(right.br_startblock))
+@@ -4221,7 +4221,7 @@ xfs_bmapi_read(
+ break;
+
+ /* Else go on to the next record. */
+- if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
++ if (++lastx < xfs_iext_count(ifp))
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+ else
+ eof = 1;
+@@ -4733,7 +4733,7 @@ xfs_bmapi_write(
+
+ /* Else go on to the next record. */
+ bma.prev = bma.got;
+- if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
++ if (++bma.idx < xfs_iext_count(ifp)) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
+ &bma.got);
+ } else
+@@ -4885,7 +4885,7 @@ xfs_bmap_del_extent_delay(
+ da_new = 0;
+
+ ASSERT(*idx >= 0);
+- ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++ ASSERT(*idx <= xfs_iext_count(ifp));
+ ASSERT(del->br_blockcount > 0);
+ ASSERT(got->br_startoff <= del->br_startoff);
+ ASSERT(got_endoff >= del_endoff);
+@@ -5016,7 +5016,7 @@ xfs_bmap_del_extent_cow(
+ got_endoff = got->br_startoff + got->br_blockcount;
+
+ ASSERT(*idx >= 0);
+- ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
++ ASSERT(*idx <= xfs_iext_count(ifp));
+ ASSERT(del->br_blockcount > 0);
+ ASSERT(got->br_startoff <= del->br_startoff);
+ ASSERT(got_endoff >= del_endoff);
+@@ -5122,8 +5122,7 @@ xfs_bmap_del_extent(
+ state |= BMAP_COWFORK;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+- ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
+- (uint)sizeof(xfs_bmbt_rec_t)));
++ ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
+ ASSERT(del->br_blockcount > 0);
+ ep = xfs_iext_get_ext(ifp, *idx);
+ xfs_bmbt_get_all(ep, &got);
+@@ -5448,7 +5447,6 @@ __xfs_bunmapi(
+ int logflags; /* transaction logging flags */
+ xfs_extlen_t mod; /* rt extent offset */
+ xfs_mount_t *mp; /* mount structure */
+- xfs_extnum_t nextents; /* number of file extents */
+ xfs_bmbt_irec_t prev; /* previous extent record */
+ xfs_fileoff_t start; /* first file offset deleted */
+ int tmp_logflags; /* partial logging flags */
+@@ -5480,8 +5478,7 @@ __xfs_bunmapi(
+ if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+ (error = xfs_iread_extents(tp, ip, whichfork)))
+ return error;
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+- if (nextents == 0) {
++ if (xfs_iext_count(ifp) == 0) {
+ *rlen = 0;
+ return 0;
+ }
+@@ -5966,7 +5963,7 @@ xfs_bmse_shift_one(
+
+ mp = ip->i_mount;
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++ total_extents = xfs_iext_count(ifp);
+
+ xfs_bmbt_get_all(gotp, &got);
+
+@@ -6143,7 +6140,7 @@ xfs_bmap_shift_extents(
+ * are collapsing out, so we cannot use the count of real extents here.
+ * Instead we have to calculate it from the incore fork.
+ */
+- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++ total_extents = xfs_iext_count(ifp);
+ if (total_extents == 0) {
+ *done = 1;
+ goto del_cursor;
+@@ -6203,7 +6200,7 @@ xfs_bmap_shift_extents(
+ * count can change. Update the total and grade the next record.
+ */
+ if (direction == SHIFT_LEFT) {
+- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
++ total_extents = xfs_iext_count(ifp);
+ stop_extent = total_extents;
+ }
+
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -775,6 +775,13 @@ xfs_idestroy_fork(
+ }
+ }
+
++/* Count number of incore extents based on if_bytes */
++xfs_extnum_t
++xfs_iext_count(struct xfs_ifork *ifp)
++{
++ return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++}
++
+ /*
+ * Convert in-core extents to on-disk form
+ *
+@@ -803,7 +810,7 @@ xfs_iextents_copy(
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+ ASSERT(ifp->if_bytes > 0);
+
+- nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nrecs = xfs_iext_count(ifp);
+ XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
+ ASSERT(nrecs > 0);
+
+@@ -941,7 +948,7 @@ xfs_iext_get_ext(
+ xfs_extnum_t idx) /* index of target extent */
+ {
+ ASSERT(idx >= 0);
+- ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
++ ASSERT(idx < xfs_iext_count(ifp));
+
+ if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+ return ifp->if_u1.if_ext_irec->er_extbuf;
+@@ -1017,7 +1024,7 @@ xfs_iext_add(
+ int new_size; /* size of extents after adding */
+ xfs_extnum_t nextents; /* number of extents in file */
+
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ ASSERT((idx >= 0) && (idx <= nextents));
+ byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+ new_size = ifp->if_bytes + byte_diff;
+@@ -1241,7 +1248,7 @@ xfs_iext_remove(
+ trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
+
+ ASSERT(ext_diff > 0);
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+
+ if (new_size == 0) {
+@@ -1270,7 +1277,7 @@ xfs_iext_remove_inline(
+
+ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+ ASSERT(idx < XFS_INLINE_EXTS);
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ ASSERT(((nextents - ext_diff) > 0) &&
+ (nextents - ext_diff) < XFS_INLINE_EXTS);
+
+@@ -1309,7 +1316,7 @@ xfs_iext_remove_direct(
+ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+ new_size = ifp->if_bytes -
+ (ext_diff * sizeof(xfs_bmbt_rec_t));
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+
+ if (new_size == 0) {
+ xfs_iext_destroy(ifp);
+@@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct(
+ int size; /* size of file extents */
+
+ ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ ASSERT(nextents <= XFS_LINEAR_EXTS);
+ size = nextents * sizeof(xfs_bmbt_rec_t);
+
+@@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext(
+ xfs_extnum_t nextents; /* number of file extents */
+ xfs_fileoff_t startoff = 0; /* start offset of extent */
+
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ if (nextents == 0) {
+ *idxp = 0;
+ return NULL;
+@@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec(
+
+ ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+ ASSERT(page_idx >= 0);
+- ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+- ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
++ ASSERT(page_idx <= xfs_iext_count(ifp));
++ ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
+
+ nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+ erp_idx = 0;
+@@ -1782,7 +1789,7 @@ xfs_iext_irec_init(
+ xfs_extnum_t nextents; /* number of extents in file */
+
+ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ ASSERT(nextents <= XFS_LINEAR_EXTS);
+
+ erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
+@@ -1906,7 +1913,7 @@ xfs_iext_irec_compact(
+
+ ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+ nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+
+ if (nextents == 0) {
+ xfs_iext_destroy(ifp);
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_ino
+
+ struct xfs_bmbt_rec_host *
+ xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
++xfs_extnum_t xfs_iext_count(struct xfs_ifork *);
+ void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
+ struct xfs_bmbt_irec *, int);
+ void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -359,9 +359,7 @@ xfs_bmap_count_blocks(
+ mp = ip->i_mount;
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
+- xfs_bmap_count_leaves(ifp, 0,
+- ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
+- count);
++ xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+ return 0;
+ }
+
+@@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole(
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (!moretocome &&
+ xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
+- (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
++ (lastx == xfs_iext_count(ifp) - 1))
+ out->bmv_oflags |= BMV_OF_LAST;
+ }
+
+@@ -1878,15 +1876,13 @@ xfs_swap_extent_forks(
+
+ switch (ip->i_d.di_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+- /* If the extents fit in the inode, fix the
+- * pointer. Otherwise it's already NULL or
+- * pointing to the extent.
++ /*
++ * If the extents fit in the inode, fix the pointer. Otherwise
++ * it's already NULL or pointing to the extent.
+ */
+- nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+- if (nextents <= XFS_INLINE_EXTS) {
+- ifp->if_u1.if_extents =
+- ifp->if_u2.if_inline_ext;
+- }
++ nextents = xfs_iext_count(&ip->i_df);
++ if (nextents <= XFS_INLINE_EXTS)
++ ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+ (*src_log_flags) |= XFS_ILOG_DEXT;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+@@ -1898,15 +1894,13 @@ xfs_swap_extent_forks(
+
+ switch (tip->i_d.di_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+- /* If the extents fit in the inode, fix the
+- * pointer. Otherwise it's already NULL or
+- * pointing to the extent.
++ /*
++ * If the extents fit in the inode, fix the pointer. Otherwise
++ * it's already NULL or pointing to the extent.
+ */
+- nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+- if (nextents <= XFS_INLINE_EXTS) {
+- tifp->if_u1.if_extents =
+- tifp->if_u2.if_inline_ext;
+- }
++ nextents = xfs_iext_count(&tip->i_df);
++ if (nextents <= XFS_INLINE_EXTS)
++ tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
+ (*target_log_flags) |= XFS_ILOG_DEXT;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork(
+ struct xfs_bmbt_rec *p;
+
+ ASSERT(ip->i_df.if_u1.if_extents != NULL);
+- ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
++ ASSERT(xfs_iext_count(&ip->i_df) > 0);
+
+ p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
+ data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
+@@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork(
+ ip->i_afp->if_bytes > 0) {
+ struct xfs_bmbt_rec *p;
+
+- ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
++ ASSERT(xfs_iext_count(ip->i_afp) ==
+ ip->i_d.di_anextents);
+ ASSERT(ip->i_afp->if_u1.if_extents != NULL);
+
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr(
+ if (attr) {
+ if (ip->i_afp) {
+ if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+- fa.fsx_nextents = ip->i_afp->if_bytes /
+- sizeof(xfs_bmbt_rec_t);
++ fa.fsx_nextents = xfs_iext_count(ip->i_afp);
+ else
+ fa.fsx_nextents = ip->i_d.di_anextents;
+ } else
+ fa.fsx_nextents = 0;
+ } else {
+ if (ip->i_df.if_flags & XFS_IFEXTENTS)
+- fa.fsx_nextents = ip->i_df.if_bytes /
+- sizeof(xfs_bmbt_rec_t);
++ fa.fsx_nextents = xfs_iext_count(&ip->i_df);
+ else
+ fa.fsx_nextents = ip->i_d.di_nextents;
+ }
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks(
+ return error;
+ }
+ rtblks = 0;
+- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
++ nextents = xfs_iext_count(ifp);
+ for (idx = 0; idx < nextents; idx++)
+ rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+ *O_rtblks = (xfs_qcnt_t)rtblks;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -486,7 +486,7 @@ xfs_reflink_trim_irec_to_next_cow(
+ /* This is the extent before; try sliding up one. */
+ if (irec.br_startoff < offset_fsb) {
+ idx++;
+- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
++ if (idx >= xfs_iext_count(ifp))
+ return 0;
+ gotp = xfs_iext_get_ext(ifp, idx);
+ xfs_bmbt_get_all(gotp, &irec);
+@@ -566,7 +566,7 @@ xfs_reflink_cancel_cow_blocks(
+ xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+ }
+
+- if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
++ if (++idx >= xfs_iext_count(ifp))
+ break;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
+ }
--- /dev/null
+From hch@lst.de Tue Jan 10 11:24:50 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:42 +0100
+Subject: xfs: remove prev argument to xfs_bmapi_reserve_delalloc
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-12-git-send-email-hch@lst.de>
+
+
+commit 65c5f419788d623a0410eca1866134f5e4628594 upstream.
+
+We can easily lookup the previous extent for the cases where we need it,
+which saves the callers from looking it up for us later in the series.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 8 ++++++--
+ fs/xfs/libxfs/xfs_bmap.h | 3 +--
+ fs/xfs/xfs_iomap.c | 3 +--
+ fs/xfs/xfs_reflink.c | 2 +-
+ 4 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4250,7 +4250,6 @@ xfs_bmapi_reserve_delalloc(
+ xfs_fileoff_t aoff,
+ xfs_filblks_t len,
+ struct xfs_bmbt_irec *got,
+- struct xfs_bmbt_irec *prev,
+ xfs_extnum_t *lastx,
+ int eof)
+ {
+@@ -4272,7 +4271,12 @@ xfs_bmapi_reserve_delalloc(
+ else
+ extsz = xfs_get_extsz_hint(ip);
+ if (extsz) {
+- error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
++ struct xfs_bmbt_irec prev;
++
++ if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
++ prev.br_startoff = NULLFILEOFF;
++
++ error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
+ 1, 0, &aoff, &alen);
+ ASSERT(!error);
+ }
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -243,8 +243,7 @@ struct xfs_bmbt_rec_host *
+ struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
+ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+ xfs_fileoff_t aoff, xfs_filblks_t len,
+- struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev,
+- xfs_extnum_t *lastx, int eof);
++ struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
+
+ enum xfs_bmap_intent_type {
+ XFS_BMAP_MAP = 1,
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -622,8 +622,7 @@ xfs_file_iomap_begin_delay(
+
+ retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+- end_fsb - offset_fsb, &got,
+- &prev, &idx, eof);
++ end_fsb - offset_fsb, &got, &idx, eof);
+ switch (error) {
+ case 0:
+ break;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -293,7 +293,7 @@ xfs_reflink_reserve_cow(
+
+ retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+- end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
++ end_fsb - imap->br_startoff, &got, &idx, eof);
+ switch (error) {
+ case 0:
+ break;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:25:03 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:43 +0100
+Subject: xfs: track preallocation separately in xfs_bmapi_reserve_delalloc()
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-13-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 974ae922efd93b07b6cdf989ae959883f6f05fd8 upstream.
+
+Speculative preallocation is currently processed entirely by the callers
+of xfs_bmapi_reserve_delalloc(). The caller determines how much
+preallocation to include, adjusts the extent length and passes down the
+resulting request.
+
+While this works fine for post-eof speculative preallocation, it is not
+as reliable for COW fork preallocation. COW fork preallocation is
+implemented via the cowextszhint, which aligns the start offset as well
+as the length of the extent. Further, it is difficult for the caller to
+accurately identify when preallocation occurs because the returned
+extent could have been merged with neighboring extents in the fork.
+
+To simplify this situation and facilitate further COW fork preallocation
+enhancements, update xfs_bmapi_reserve_delalloc() to take a separate
+preallocation parameter to incorporate into the allocation request. The
+preallocation blocks value is tacked onto the end of the request and
+adjusted to accommodate neighboring extents and extent size limits.
+Since xfs_bmapi_reserve_delalloc() now knows precisely how much
+preallocation was included in the allocation, it can also tag the inodes
+appropriately to support preallocation reclaim.
+
+Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to
+use the preallocation mechanism. This patch should not change behavior
+outside of correctly tagging reflink inodes when start offset
+preallocation occurs (which the caller does not handle correctly).
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 23 +++++++++++++++++++++--
+ fs/xfs/libxfs/xfs_bmap.h | 2 +-
+ fs/xfs/xfs_iomap.c | 2 +-
+ fs/xfs/xfs_reflink.c | 2 +-
+ 4 files changed, 24 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -50,6 +50,7 @@
+ #include "xfs_ag_resv.h"
+ #include "xfs_refcount.h"
+ #include "xfs_rmap_btree.h"
++#include "xfs_icache.h"
+
+
+ kmem_zone_t *xfs_bmap_free_item_zone;
+@@ -4247,8 +4248,9 @@ int
+ xfs_bmapi_reserve_delalloc(
+ struct xfs_inode *ip,
+ int whichfork,
+- xfs_fileoff_t aoff,
++ xfs_fileoff_t off,
+ xfs_filblks_t len,
++ xfs_filblks_t prealloc,
+ struct xfs_bmbt_irec *got,
+ xfs_extnum_t *lastx,
+ int eof)
+@@ -4260,10 +4262,17 @@ xfs_bmapi_reserve_delalloc(
+ char rt = XFS_IS_REALTIME_INODE(ip);
+ xfs_extlen_t extsz;
+ int error;
++ xfs_fileoff_t aoff = off;
+
+- alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
++ /*
++ * Cap the alloc length. Keep track of prealloc so we know whether to
++ * tag the inode before we return.
++ */
++ alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
+ if (!eof)
+ alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
++ if (prealloc && alen >= len)
++ prealloc = alen - len;
+
+ /* Figure out the extent size, adjust alen */
+ if (whichfork == XFS_COW_FORK)
+@@ -4329,6 +4338,16 @@ xfs_bmapi_reserve_delalloc(
+ */
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+
++ /*
++ * Tag the inode if blocks were preallocated. Note that COW fork
++ * preallocation can occur at the start or end of the extent, even when
++ * prealloc == 0, so we must also check the aligned offset and length.
++ */
++ if (whichfork == XFS_DATA_FORK && prealloc)
++ xfs_inode_set_eofblocks_tag(ip);
++ if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
++ xfs_inode_set_cowblocks_tag(ip);
++
+ ASSERT(got->br_startoff <= aoff);
+ ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+ ASSERT(isnullstartblock(got->br_startblock));
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -242,7 +242,7 @@ struct xfs_bmbt_rec_host *
+ int fork, int *eofp, xfs_extnum_t *lastxp,
+ struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
+ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+- xfs_fileoff_t aoff, xfs_filblks_t len,
++ xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+ struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
+
+ enum xfs_bmap_intent_type {
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -622,7 +622,7 @@ xfs_file_iomap_begin_delay(
+
+ retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
+- end_fsb - offset_fsb, &got, &idx, eof);
++ end_fsb - offset_fsb, 0, &got, &idx, eof);
+ switch (error) {
+ case 0:
+ break;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -293,7 +293,7 @@ xfs_reflink_reserve_cow(
+
+ retry:
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+- end_fsb - imap->br_startoff, &got, &idx, eof);
++ end_fsb - imap->br_startoff, 0, &got, &idx, eof);
+ switch (error) {
+ case 0:
+ break;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:30:48 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:59 +0100
+Subject: xfs: use GPF_NOFS when allocating btree cursors
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-29-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit b24a978c377be5f14e798cb41238e66fe51aab2f upstream.
+
+Use NOFS for allocating btree cursors, since they can be called
+under the ilock.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc_btree.c | 2 +-
+ fs/xfs/libxfs/xfs_bmap_btree.c | 2 +-
+ fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc_btree.c
++++ b/fs/xfs/libxfs/xfs_alloc_btree.c
+@@ -421,7 +421,7 @@ xfs_allocbt_init_cursor(
+
+ ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
+
+- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -796,7 +796,7 @@ xfs_bmbt_init_cursor(
+ struct xfs_btree_cur *cur;
+ ASSERT(whichfork != XFS_COW_FORK);
+
+- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -357,7 +357,7 @@ xfs_inobt_init_cursor(
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ struct xfs_btree_cur *cur;
+
+- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
++ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:25:16 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:44 +0100
+Subject: xfs: use new extent lookup helpers in __xfs_reflink_reserve_cow
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-14-git-send-email-hch@lst.de>
+
+
+commit 2755fc4438501c8c28e7783df890e889f6772bee upstream.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_reflink.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -243,10 +243,11 @@ xfs_reflink_reserve_cow(
+ struct xfs_bmbt_irec *imap,
+ bool *shared)
+ {
+- struct xfs_bmbt_irec got, prev;
++ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
++ struct xfs_bmbt_irec got;
+ xfs_fileoff_t end_fsb, orig_end_fsb;
+- int eof = 0, error = 0;
+- bool trimmed;
++ int error = 0;
++ bool eof = false, trimmed;
+ xfs_extnum_t idx;
+ xfs_extlen_t align;
+
+@@ -258,8 +259,9 @@ xfs_reflink_reserve_cow(
+ * extent list is generally faster than going out to the shared extent
+ * tree.
+ */
+- xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
+- &got, &prev);
++
++ if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
++ eof = true;
+ if (!eof && got.br_startoff <= imap->br_startoff) {
+ trace_xfs_reflink_cow_found(ip, imap);
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
--- /dev/null
+From hch@lst.de Tue Jan 10 11:25:38 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:38:46 +0100
+Subject: xfs: use new extent lookup helpers xfs_file_iomap_begin_delay
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Dave Chinner <david@fromorbit.com>
+Message-ID: <1483976343-661-16-git-send-email-hch@lst.de>
+
+
+commit 656152e552e5cbe0c11ad261b524376217c2fb13 upstream.
+
+And only lookup the previous extent inside xfs_iomap_prealloc_size
+if we actually need it.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c | 20 +++++++++-----------
+ 1 file changed, 9 insertions(+), 11 deletions(-)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -395,11 +395,12 @@ xfs_iomap_prealloc_size(
+ struct xfs_inode *ip,
+ loff_t offset,
+ loff_t count,
+- xfs_extnum_t idx,
+- struct xfs_bmbt_irec *prev)
++ xfs_extnum_t idx)
+ {
+ struct xfs_mount *mp = ip->i_mount;
++ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
++ struct xfs_bmbt_irec prev;
+ int shift = 0;
+ int64_t freesp;
+ xfs_fsblock_t qblocks;
+@@ -419,8 +420,8 @@ xfs_iomap_prealloc_size(
+ */
+ if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
+ XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
+- idx == 0 ||
+- prev->br_startoff + prev->br_blockcount < offset_fsb)
++ !xfs_iext_get_extent(ifp, idx - 1, &prev) ||
++ prev.br_startoff + prev.br_blockcount < offset_fsb)
+ return mp->m_writeio_blocks;
+
+ /*
+@@ -439,8 +440,8 @@ xfs_iomap_prealloc_size(
+ * always extends to MAXEXTLEN rather than falling short due to things
+ * like stripe unit/width alignment of real extents.
+ */
+- if (prev->br_blockcount <= (MAXEXTLEN >> 1))
+- alloc_blocks = prev->br_blockcount << 1;
++ if (prev.br_blockcount <= (MAXEXTLEN >> 1))
++ alloc_blocks = prev.br_blockcount << 1;
+ else
+ alloc_blocks = XFS_B_TO_FSB(mp, offset);
+ if (!alloc_blocks)
+@@ -538,7 +539,6 @@ xfs_file_iomap_begin_delay(
+ xfs_fileoff_t end_fsb, orig_end_fsb;
+ int error = 0, eof = 0;
+ struct xfs_bmbt_irec got;
+- struct xfs_bmbt_irec prev;
+ xfs_extnum_t idx;
+
+ ASSERT(!XFS_IS_REALTIME_INODE(ip));
+@@ -563,8 +563,7 @@ xfs_file_iomap_begin_delay(
+ goto out_unlock;
+ }
+
+- xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
+- &got, &prev);
++ eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+ if (!eof && got.br_startoff <= offset_fsb) {
+ if (xfs_is_reflink_inode(ip)) {
+ bool shared;
+@@ -601,8 +600,7 @@ xfs_file_iomap_begin_delay(
+ if (eof) {
+ xfs_fsblock_t prealloc_blocks;
+
+- prealloc_blocks =
+- xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
++ prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
+ if (prealloc_blocks) {
+ xfs_extlen_t align;
+ xfs_off_t end_offset;
--- /dev/null
+From hch@lst.de Tue Jan 10 11:31:10 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 9 Jan 2017 16:39:01 +0100
+Subject: xfs: use the actual AG length when reserving blocks
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1483976343-661-31-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 20e73b000bcded44a91b79429d8fa743247602ad upstream.
+
+We need to use the actual AG length when making per-AG reservations,
+since we could otherwise end up reserving more blocks out of the last
+AG than there are actual blocks.
+
+Complained-about-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag_resv.c | 3 +++
+ fs/xfs/libxfs/xfs_refcount_btree.c | 9 ++++++---
+ fs/xfs/libxfs/xfs_refcount_btree.h | 3 ++-
+ fs/xfs/libxfs/xfs_rmap_btree.c | 14 +++++++-------
+ fs/xfs/libxfs/xfs_rmap_btree.h | 3 ++-
+ fs/xfs/xfs_fsops.c | 14 ++++++++++++++
+ 6 files changed, 34 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -256,6 +256,9 @@ xfs_ag_resv_init(
+ goto out;
+ }
+
++ ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
++ xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
++ pag->pagf_freeblks + pag->pagf_flcount);
+ out:
+ return error;
+ }
+--- a/fs/xfs/libxfs/xfs_refcount_btree.c
++++ b/fs/xfs/libxfs/xfs_refcount_btree.c
+@@ -408,13 +408,14 @@ xfs_refcountbt_calc_size(
+ */
+ xfs_extlen_t
+ xfs_refcountbt_max_size(
+- struct xfs_mount *mp)
++ struct xfs_mount *mp,
++ xfs_agblock_t agblocks)
+ {
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_refc_mxr[0] == 0)
+ return 0;
+
+- return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks);
++ return xfs_refcountbt_calc_size(mp, agblocks);
+ }
+
+ /*
+@@ -429,22 +430,24 @@ xfs_refcountbt_calc_reserves(
+ {
+ struct xfs_buf *agbp;
+ struct xfs_agf *agf;
++ xfs_agblock_t agblocks;
+ xfs_extlen_t tree_len;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+- *ask += xfs_refcountbt_max_size(mp);
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error)
+ return error;
+
+ agf = XFS_BUF_TO_AGF(agbp);
++ agblocks = be32_to_cpu(agf->agf_length);
+ tree_len = be32_to_cpu(agf->agf_refcount_blocks);
+ xfs_buf_relse(agbp);
+
++ *ask += xfs_refcountbt_max_size(mp, agblocks);
+ *used += tree_len;
+
+ return error;
+--- a/fs/xfs/libxfs/xfs_refcount_btree.h
++++ b/fs/xfs/libxfs/xfs_refcount_btree.h
+@@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxle
+
+ extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
+-extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp);
++extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp,
++ xfs_agblock_t agblocks);
+
+ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
+ xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+--- a/fs/xfs/libxfs/xfs_rmap_btree.c
++++ b/fs/xfs/libxfs/xfs_rmap_btree.c
+@@ -549,13 +549,14 @@ xfs_rmapbt_calc_size(
+ */
+ xfs_extlen_t
+ xfs_rmapbt_max_size(
+- struct xfs_mount *mp)
++ struct xfs_mount *mp,
++ xfs_agblock_t agblocks)
+ {
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_rmap_mxr[0] == 0)
+ return 0;
+
+- return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks);
++ return xfs_rmapbt_calc_size(mp, agblocks);
+ }
+
+ /*
+@@ -570,25 +571,24 @@ xfs_rmapbt_calc_reserves(
+ {
+ struct xfs_buf *agbp;
+ struct xfs_agf *agf;
+- xfs_extlen_t pool_len;
++ xfs_agblock_t agblocks;
+ xfs_extlen_t tree_len;
+ int error;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 0;
+
+- /* Reserve 1% of the AG or enough for 1 block per record. */
+- pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp));
+- *ask += pool_len;
+-
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error)
+ return error;
+
+ agf = XFS_BUF_TO_AGF(agbp);
++ agblocks = be32_to_cpu(agf->agf_length);
+ tree_len = be32_to_cpu(agf->agf_rmap_blocks);
+ xfs_buf_relse(agbp);
+
++ /* Reserve 1% of the AG or enough for 1 block per record. */
++ *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
+ *used += tree_len;
+
+ return error;
+--- a/fs/xfs/libxfs/xfs_rmap_btree.h
++++ b/fs/xfs/libxfs/xfs_rmap_btree.h
+@@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels
+
+ extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
+-extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp);
++extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
++ xfs_agblock_t agblocks);
+
+ extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
+ xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -631,6 +631,20 @@ xfs_growfs_data_private(
+ xfs_set_low_space_thresholds(mp);
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+
++ /*
++ * If we expanded the last AG, free the per-AG reservation
++ * so we can reinitialize it with the new size.
++ */
++ if (new) {
++ struct xfs_perag *pag;
++
++ pag = xfs_perag_get(mp, agno);
++ error = xfs_ag_resv_free(pag);
++ xfs_perag_put(pag);
++ if (error)
++ goto out;
++ }
++
+ /* Reserve AG metadata blocks. */
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error != -ENOSPC)