--- /dev/null
+From e150dcd459e1b441eaf08f341a986f04e61bf3b8 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 18 Sep 2017 11:34:16 -0700
+Subject: fs/xfs: Use %pS printk format for direct addresses
+
+From: Helge Deller <deller@gmx.de>
+
+commit e150dcd459e1b441eaf08f341a986f04e61bf3b8 upstream.
+
+Use the %pS instead of the %pF printk format specifier for printing symbols
+from direct addresses. This is needed for the ia64, ppc64 and parisc64
+architectures.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_error.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -167,7 +167,7 @@ xfs_verifier_error(
+ {
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+- xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
++ xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
+ bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
+ __return_address, bp->b_ops->name, bp->b_bn);
+
keys-don-t-let-add_key-update-an-uninstantiated-key.patch
pkcs7-prevent-null-pointer-dereference-since-sinfo-is-not-always-set.patch
vmbus-fix-missing-signaling-in-hv_signal_on_read.patch
+xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch
+xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch
+fs-xfs-use-ps-printk-format-for-direct-addresses.patch
+xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch
+xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch
+xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch
+xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch
+xfs-always-swap-the-cow-forks-when-swapping-extents.patch
+xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch
+xfs-don-t-log-uninitialised-fields-in-inode-structures.patch
+xfs-move-more-rt-specific-code-under-config_xfs_rt.patch
+xfs-don-t-change-inode-mode-if-acl-update-fails.patch
+xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch
+xfs-handle-error-if-xfs_btree_get_bufs-fails.patch
+xfs-cancel-dirty-pages-on-invalidation.patch
+xfs-trim-writepage-mapping-to-within-eof.patch
--- /dev/null
+From 52bfcdd7adbc26639bc7b2356ab9a3f5dad68ad6 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:18 -0700
+Subject: xfs: always swap the cow forks when swapping extents
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 52bfcdd7adbc26639bc7b2356ab9a3f5dad68ad6 upstream.
+
+Since the CoW fork exists as a secondary data structure to the data
+fork, we must always swap cow forks during swapext. We also need to
+swap the extent counts and reset the cowblocks tags.
+
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -2106,11 +2106,31 @@ xfs_swap_extents(
+ ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
+ tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
++ }
++
++ /* Swap the cow forks. */
++ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
++ xfs_extnum_t extnum;
++
++ ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
++ ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
++
++ extnum = ip->i_cnextents;
++ ip->i_cnextents = tip->i_cnextents;
++ tip->i_cnextents = extnum;
++
+ cowfp = ip->i_cowfp;
+ ip->i_cowfp = tip->i_cowfp;
+ tip->i_cowfp = cowfp;
+- xfs_inode_set_cowblocks_tag(ip);
+- xfs_inode_set_cowblocks_tag(tip);
++
++ if (ip->i_cowfp && ip->i_cnextents)
++ xfs_inode_set_cowblocks_tag(ip);
++ else
++ xfs_inode_clear_cowblocks_tag(ip);
++ if (tip->i_cowfp && tip->i_cnextents)
++ xfs_inode_set_cowblocks_tag(tip);
++ else
++ xfs_inode_clear_cowblocks_tag(tip);
+ }
+
+ xfs_trans_log_inode(tp, ip, src_log_flags);
--- /dev/null
+From 793d7dbe6d82a50b9d14bf992b9eaacb70a11ce6 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Fri, 13 Oct 2017 09:47:45 -0700
+Subject: xfs: cancel dirty pages on invalidation
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 793d7dbe6d82a50b9d14bf992b9eaacb70a11ce6 upstream.
+
+Recently we've had warnings arise from the vm handing us pages
+without bufferheads attached to them. This should not ever occur
+in XFS, but we don't defend against it properly if it does. The only
+place where we remove bufferheads from a page is in
+xfs_vm_releasepage(), but we can't tell the difference here between
+"page is dirty so don't release" and "page is dirty but is being
+invalidated so release it".
+
+In some places that are invalidating pages ask for pages to be
+released and follow up afterward calling ->releasepage by checking
+whether the page was dirty and then aborting the invalidation. This
+is a possible vector for releasing buffers from a page but then
+leaving it in the mapping, so we really do need to avoid dirty pages
+in xfs_vm_releasepage().
+
+To differentiate between invalidated pages and normal pages, we need
+to clear the page dirty flag when invalidating the pages. This can
+be done through xfs_vm_invalidatepage(), and will result
+xfs_vm_releasepage() seeing the page as clean which matches the
+bufferhead state on the page after calling block_invalidatepage().
+
+Hence we can re-add the page dirty check in xfs_vm_releasepage to
+catch the case where we might be releasing a page that is actually
+dirty and so should not have the bufferheads on it removed. This
+will remove one possible vector of "dirty page with no bufferheads"
+and so help narrow down the search for the root cause of that
+problem.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_aops.c | 34 ++++++++++++++++++++++------------
+ 1 file changed, 22 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -726,6 +726,14 @@ xfs_vm_invalidatepage(
+ {
+ trace_xfs_invalidatepage(page->mapping->host, page, offset,
+ length);
++
++ /*
++ * If we are invalidating the entire page, clear the dirty state from it
++ * so that we can check for attempts to release dirty cached pages in
++ * xfs_vm_releasepage().
++ */
++ if (offset == 0 && length >= PAGE_SIZE)
++ cancel_dirty_page(page);
+ block_invalidatepage(page, offset, length);
+ }
+
+@@ -1181,25 +1189,27 @@ xfs_vm_releasepage(
+ * mm accommodates an old ext3 case where clean pages might not have had
+ * the dirty bit cleared. Thus, it can send actual dirty pages to
+ * ->releasepage() via shrink_active_list(). Conversely,
+- * block_invalidatepage() can send pages that are still marked dirty
+- * but otherwise have invalidated buffers.
++ * block_invalidatepage() can send pages that are still marked dirty but
++ * otherwise have invalidated buffers.
+ *
+ * We want to release the latter to avoid unnecessary buildup of the
+- * LRU, skip the former and warn if we've left any lingering
+- * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
+- * or unwritten buffers and warn if the page is not dirty. Otherwise
+- * try to release the buffers.
++ * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
++ * that are entirely invalidated and need to be released. Hence the
++ * only time we should get dirty pages here is through
++ * shrink_active_list() and so we can simply skip those now.
++ *
++ * warn if we've left any lingering delalloc/unwritten buffers on clean
++ * or invalidated pages we are about to release.
+ */
++ if (PageDirty(page))
++ return 0;
++
+ xfs_count_page_state(page, &delalloc, &unwritten);
+
+- if (delalloc) {
+- WARN_ON_ONCE(!PageDirty(page));
++ if (WARN_ON_ONCE(delalloc))
+ return 0;
+- }
+- if (unwritten) {
+- WARN_ON_ONCE(!PageDirty(page));
++ if (WARN_ON_ONCE(unwritten))
+ return 0;
+- }
+
+ return try_to_free_buffers(page);
+ }
--- /dev/null
+From 842f6e9f786226c58fcbd5ef80eadca72fdfe652 Mon Sep 17 00:00:00 2001
+From: Carlos Maiolino <cmaiolino@redhat.com>
+Date: Fri, 22 Sep 2017 11:47:46 -0700
+Subject: xfs: Capture state of the right inode in xfs_iflush_done
+
+From: Carlos Maiolino <cmaiolino@redhat.com>
+
+commit 842f6e9f786226c58fcbd5ef80eadca72fdfe652 upstream.
+
+My previous patch: d3a304b6292168b83b45d624784f973fdc1ca674 check for
+XFS_LI_FAILED flag xfs_iflush done, so the failed item can be properly
+resubmitted.
+
+In the loop scanning other inodes being completed, it should check the
+current item for the XFS_LI_FAILED, and not the initial one.
+
+The state of the initial inode is checked after the loop ends
+
+Kudos to Eric for catching this.
+
+Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode_item.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -745,7 +745,7 @@ xfs_iflush_done(
+ */
+ iip = INODE_ITEM(blip);
+ if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
+- lip->li_flags & XFS_LI_FAILED)
++ (blip->li_flags & XFS_LI_FAILED))
+ need_ail++;
+
+ blip = next;
--- /dev/null
+From 67f2ffe31d1a683170c2ba0ecc643e42a5fdd397 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:23 -0700
+Subject: xfs: don't change inode mode if ACL update fails
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 67f2ffe31d1a683170c2ba0ecc643e42a5fdd397 upstream.
+
+If we get ENOSPC half way through setting the ACL, the inode mode
+can still be changed even though the ACL does not exist. Reorder the
+operation to only change the mode of the inode if the ACL is set
+correctly.
+
+Whilst this does not fix the problem with crash consistency (that requires
+attribute addition to be a deferred op) it does prevent ENOSPC and other
+non-fatal errors setting an xattr to be handled sanely.
+
+This fixes xfstests generic/449.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_acl.c | 22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_acl.c
++++ b/fs/xfs/xfs_acl.c
+@@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_
+ int
+ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ {
++ umode_t mode;
++ bool set_mode = false;
+ int error = 0;
+
+ if (!acl)
+@@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct
+ return error;
+
+ if (type == ACL_TYPE_ACCESS) {
+- umode_t mode;
+-
+ error = posix_acl_update_mode(inode, &mode, &acl);
+ if (error)
+ return error;
+- error = xfs_set_mode(inode, mode);
+- if (error)
+- return error;
++ set_mode = true;
+ }
+
+ set_acl:
+- return __xfs_set_acl(inode, acl, type);
++ error = __xfs_set_acl(inode, acl, type);
++ if (error)
++ return error;
++
++ /*
++ * We set the mode after successfully updating the ACL xattr because the
++ * xattr update can fail at ENOSPC and we don't want to change the mode
++ * if the ACL update hasn't been applied.
++ */
++ if (set_mode)
++ error = xfs_set_mode(inode, mode);
++
++ return error;
+ }
--- /dev/null
+From 20413e37d71befd02b5846acdaf5e2564dd1c38e Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:22 -0700
+Subject: xfs: Don't log uninitialised fields in inode structures
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 20413e37d71befd02b5846acdaf5e2564dd1c38e upstream.
+
+Prevent kmemcheck from throwing warnings about reading uninitialised
+memory when formatting inodes into the incore log buffer. There are
+several issues here - we don't always log all the fields in the
+inode log format item, and we never log the inode the
+di_next_unlinked field.
+
+In the case of the inode log format item, this is exacerbated
+by the old xfs_inode_log_format structure padding issue. Hence make
+the padded, 64 bit aligned version of the structure the one we always
+use for formatting the log and get rid of the 64 bit variant. This
+means we'll always log the 64-bit version and so recovery only needs
+to convert from the unpadded 32 bit version from older 32 bit
+kernels.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/xfs/libxfs/xfs_log_format.h | 27 ++++---------
+ fs/xfs/xfs_inode_item.c | 82 ++++++++++++++++++++---------------------
+ fs/xfs/xfs_ondisk.h | 2 -
+ 3 files changed, 50 insertions(+), 61 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_log_format.h
++++ b/fs/xfs/libxfs/xfs_log_format.h
+@@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format {
+ __uint32_t ilf_fields; /* flags for fields logged */
+ __uint16_t ilf_asize; /* size of attr d/ext/root */
+ __uint16_t ilf_dsize; /* size of data/ext/root */
++ __uint32_t ilf_pad; /* pad for 64 bit boundary */
+ __uint64_t ilf_ino; /* inode number */
+ union {
+ __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+@@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format {
+ __int32_t ilf_boffset; /* off of inode in buffer */
+ } xfs_inode_log_format_t;
+
+-typedef struct xfs_inode_log_format_32 {
+- __uint16_t ilf_type; /* inode log item type */
+- __uint16_t ilf_size; /* size of this item */
+- __uint32_t ilf_fields; /* flags for fields logged */
+- __uint16_t ilf_asize; /* size of attr d/ext/root */
+- __uint16_t ilf_dsize; /* size of data/ext/root */
+- __uint64_t ilf_ino; /* inode number */
+- union {
+- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+- uuid_t ilfu_uuid; /* mount point value */
+- } ilf_u;
+- __int64_t ilf_blkno; /* blkno of inode buffer */
+- __int32_t ilf_len; /* len of inode buffer */
+- __int32_t ilf_boffset; /* off of inode in buffer */
+-} __attribute__((packed)) xfs_inode_log_format_32_t;
+-
+-typedef struct xfs_inode_log_format_64 {
++/*
++ * Old 32 bit systems will log in this format without the 64 bit
++ * alignment padding. Recovery will detect this and convert it to the
++ * correct format.
++ */
++struct xfs_inode_log_format_32 {
+ __uint16_t ilf_type; /* inode log item type */
+ __uint16_t ilf_size; /* size of this item */
+ __uint32_t ilf_fields; /* flags for fields logged */
+ __uint16_t ilf_asize; /* size of attr d/ext/root */
+ __uint16_t ilf_dsize; /* size of data/ext/root */
+- __uint32_t ilf_pad; /* pad for 64 bit boundary */
+ __uint64_t ilf_ino; /* inode number */
+ union {
+ __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+@@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 {
+ __int64_t ilf_blkno; /* blkno of inode buffer */
+ __int32_t ilf_len; /* len of inode buffer */
+ __int32_t ilf_boffset; /* off of inode in buffer */
+-} xfs_inode_log_format_64_t;
++} __attribute__((packed));
+
+
+ /*
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -364,6 +364,9 @@ xfs_inode_to_log_dinode(
+ to->di_dmstate = from->di_dmstate;
+ to->di_flags = from->di_flags;
+
++ /* log a dummy value to ensure log structure is fully initialised */
++ to->di_next_unlinked = NULLAGINO;
++
+ if (from->di_version == 3) {
+ to->di_changecount = inode->i_version;
+ to->di_crtime.t_sec = from->di_crtime.t_sec;
+@@ -404,6 +407,11 @@ xfs_inode_item_format_core(
+ * the second with the on-disk inode structure, and a possible third and/or
+ * fourth with the inode data/extents/b-tree root and inode attributes
+ * data/extents/b-tree root.
++ *
++ * Note: Always use the 64 bit inode log format structure so we don't
++ * leave an uninitialised hole in the format item on 64 bit systems. Log
++ * recovery on 32 bit systems handles this just fine, so there's no reason
++ * for not using an initialising the properly padded structure all the time.
+ */
+ STATIC void
+ xfs_inode_item_format(
+@@ -412,8 +420,8 @@ xfs_inode_item_format(
+ {
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+- struct xfs_inode_log_format *ilf;
+ struct xfs_log_iovec *vecp = NULL;
++ struct xfs_inode_log_format *ilf;
+
+ ASSERT(ip->i_d.di_version > 1);
+
+@@ -425,7 +433,17 @@ xfs_inode_item_format(
+ ilf->ilf_boffset = ip->i_imap.im_boffset;
+ ilf->ilf_fields = XFS_ILOG_CORE;
+ ilf->ilf_size = 2; /* format + core */
+- xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
++
++ /*
++ * make sure we don't leak uninitialised data into the log in the case
++ * when we don't log every field in the inode.
++ */
++ ilf->ilf_dsize = 0;
++ ilf->ilf_asize = 0;
++ ilf->ilf_pad = 0;
++ memset(&ilf->ilf_u.ilfu_uuid, 0, sizeof(ilf->ilf_u.ilfu_uuid));
++
++ xlog_finish_iovec(lv, vecp, sizeof(*ilf));
+
+ xfs_inode_item_format_core(ip, lv, &vecp);
+ xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
+@@ -855,48 +873,30 @@ xfs_istale_done(
+ }
+
+ /*
+- * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
+- * (which can have different field alignments) to the native version
++ * convert an xfs_inode_log_format struct from the old 32 bit version
++ * (which can have different field alignments) to the native 64 bit version
+ */
+ int
+ xfs_inode_item_format_convert(
+- xfs_log_iovec_t *buf,
+- xfs_inode_log_format_t *in_f)
++ struct xfs_log_iovec *buf,
++ struct xfs_inode_log_format *in_f)
+ {
+- if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
+- xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
++ struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
+
+- in_f->ilf_type = in_f32->ilf_type;
+- in_f->ilf_size = in_f32->ilf_size;
+- in_f->ilf_fields = in_f32->ilf_fields;
+- in_f->ilf_asize = in_f32->ilf_asize;
+- in_f->ilf_dsize = in_f32->ilf_dsize;
+- in_f->ilf_ino = in_f32->ilf_ino;
+- /* copy biggest field of ilf_u */
+- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+- in_f32->ilf_u.ilfu_uuid.__u_bits,
+- sizeof(uuid_t));
+- in_f->ilf_blkno = in_f32->ilf_blkno;
+- in_f->ilf_len = in_f32->ilf_len;
+- in_f->ilf_boffset = in_f32->ilf_boffset;
+- return 0;
+- } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
+- xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
+-
+- in_f->ilf_type = in_f64->ilf_type;
+- in_f->ilf_size = in_f64->ilf_size;
+- in_f->ilf_fields = in_f64->ilf_fields;
+- in_f->ilf_asize = in_f64->ilf_asize;
+- in_f->ilf_dsize = in_f64->ilf_dsize;
+- in_f->ilf_ino = in_f64->ilf_ino;
+- /* copy biggest field of ilf_u */
+- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+- in_f64->ilf_u.ilfu_uuid.__u_bits,
+- sizeof(uuid_t));
+- in_f->ilf_blkno = in_f64->ilf_blkno;
+- in_f->ilf_len = in_f64->ilf_len;
+- in_f->ilf_boffset = in_f64->ilf_boffset;
+- return 0;
+- }
+- return -EFSCORRUPTED;
++ if (buf->i_len != sizeof(*in_f32))
++ return -EFSCORRUPTED;
++
++ in_f->ilf_type = in_f32->ilf_type;
++ in_f->ilf_size = in_f32->ilf_size;
++ in_f->ilf_fields = in_f32->ilf_fields;
++ in_f->ilf_asize = in_f32->ilf_asize;
++ in_f->ilf_dsize = in_f32->ilf_dsize;
++ in_f->ilf_ino = in_f32->ilf_ino;
++ /* copy biggest field of ilf_u */
++ memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
++ in_f32->ilf_u.ilfu_uuid.__u_bits, sizeof(uuid_t));
++ in_f->ilf_blkno = in_f32->ilf_blkno;
++ in_f->ilf_len = in_f32->ilf_len;
++ in_f->ilf_boffset = in_f32->ilf_boffset;
++ return 0;
+ }
+--- a/fs/xfs/xfs_ondisk.h
++++ b/fs/xfs/xfs_ondisk.h
+@@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void)
+ XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
+- XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56);
++ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
+ }
--- /dev/null
+From cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:16 -0700
+Subject: xfs: don't unconditionally clear the reflink flag on zero-block files
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 upstream.
+
+If we have speculative cow preallocations hanging around in the cow
+fork, don't let a truncate operation clear the reflink flag because if
+we do then there's a chance we'll forget to free those extents when we
+destroy the incore inode.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1632,10 +1632,12 @@ xfs_itruncate_extents(
+ goto out;
+
+ /*
+- * Clear the reflink flag if we truncated everything.
++ * Clear the reflink flag if there are no data fork blocks and
++ * there are no extents staged in the cow fork.
+ */
+- if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
+- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
++ if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
++ if (ip->i_d.di_nblocks == 0)
++ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_inode_clear_cowblocks_tag(ip);
+ }
+
--- /dev/null
+From 3af423b03435c81036fa710623d3ae92fbe346a3 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:17 -0700
+Subject: xfs: evict CoW fork extents when performing finsert/fcollapse
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 3af423b03435c81036fa710623d3ae92fbe346a3 upstream.
+
+When we perform an finsert/fcollapse operation, cancel all the CoW
+extents for the affected file offset range so that they don't end up
+pointing to the wrong blocks.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1445,7 +1445,19 @@ xfs_shift_file_space(
+ return error;
+
+ /*
+- * The extent shiting code works on extent granularity. So, if
++ * Clean out anything hanging around in the cow fork now that
++ * we've flushed all the dirty data out to disk to avoid having
++ * CoW extents at the wrong offsets.
++ */
++ if (xfs_is_reflink_inode(ip)) {
++ error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
++ true);
++ if (error)
++ return error;
++ }
++
++ /*
++ * The extent shifting code works on extent granularity. So, if
+ * stop_fsb is not the starting block of extent, we need to split
+ * the extent at stop_fsb.
+ */
--- /dev/null
+From 93e8befc17f6d6ea92b0aee3741ceac8bca4590f Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@sandeen.net>
+Date: Mon, 9 Oct 2017 21:08:06 -0700
+Subject: xfs: handle error if xfs_btree_get_bufs fails
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 93e8befc17f6d6ea92b0aee3741ceac8bca4590f upstream.
+
+Jason reported that a corrupted filesystem failed to replay
+the log with a metadata block out of bounds warning:
+
+XFS (dm-2): _xfs_buf_find: Block out of range: block 0x80270fff8, EOFS 0x9c40000
+
+_xfs_buf_find() and xfs_btree_get_bufs() return NULL if
+that happens, and then when xfs_alloc_fix_freelist() calls
+xfs_trans_binval() on that NULL bp, we oops with:
+
+BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8
+
+We don't handle _xfs_buf_find errors very well, every
+caller higher up the stack gets to guess at why it failed.
+But we should at least handle it somehow, so return
+EFSCORRUPTED here.
+
+Reported-by: Jason L Tibbitts III <tibbs@math.uh.edu>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_alloc.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -1579,6 +1579,10 @@ xfs_alloc_ag_vextent_small(
+
+ bp = xfs_btree_get_bufs(args->mp, args->tp,
+ args->agno, fbno, 0);
++ if (!bp) {
++ error = -EFSCORRUPTED;
++ goto error0;
++ }
+ xfs_trans_binval(args->tp, bp);
+ }
+ args->len = 1;
+@@ -2136,6 +2140,10 @@ xfs_alloc_fix_freelist(
+ if (error)
+ goto out_agbp_relse;
+ bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
++ if (!bp) {
++ error = -EFSCORRUPTED;
++ goto out_agbp_relse;
++ }
+ xfs_trans_binval(tp, bp);
+ }
+
--- /dev/null
+From e12199f85d0ad1b04ce6c425ad93cd847fe930bb Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 3 Oct 2017 08:58:33 -0700
+Subject: xfs: handle racy AIO in xfs_reflink_end_cow
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit e12199f85d0ad1b04ce6c425ad93cd847fe930bb upstream.
+
+If we got two AIO writes into a COW area the second one might not have any
+COW extents left to convert. Handle that case gracefully instead of
+triggering an assert or accessing beyond the bounds of the extent list.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/xfs/xfs_reflink.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -767,7 +767,13 @@ xfs_reflink_end_cow(
+
+ /* If there is a hole at end_fsb - 1 go to the previous extent */
+ if (eof || got.br_startoff > end_fsb) {
+- ASSERT(idx > 0);
++ /*
++ * In case of racing, overlapping AIO writes no COW extents
++ * might be left by the time I/O completes for the loser of
++ * the race. In that case we are done.
++ */
++ if (idx <= 0)
++ goto out_cancel;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+ }
+
+@@ -841,6 +847,7 @@ next_extent:
+
+ out_defer:
+ xfs_defer_cancel(&dfops);
++out_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ out:
--- /dev/null
+From bb9c2e5433250f5b477035dc478314f8e6dd5e36 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:22 -0700
+Subject: xfs: move more RT specific code under CONFIG_XFS_RT
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit bb9c2e5433250f5b477035dc478314f8e6dd5e36 upstream.
+
+Various utility functions and interfaces that iterate internal
+devices try to reference the realtime device even when RT support is
+not compiled into the kernel.
+
+Make sure this code is excluded from the CONFIG_XFS_RT=n build,
+and where appropriate stub functions to return fatal errors if
+they ever get called when RT support is not present.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c | 2 ++
+ fs/xfs/xfs_bmap_util.h | 13 +++++++++++++
+ 2 files changed, 15 insertions(+)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -84,6 +84,7 @@ xfs_zero_extent(
+ GFP_NOFS, true);
+ }
+
++#ifdef CONFIG_XFS_RT
+ int
+ xfs_bmap_rtalloc(
+ struct xfs_bmalloca *ap) /* bmap alloc argument struct */
+@@ -195,6 +196,7 @@ xfs_bmap_rtalloc(
+ }
+ return 0;
+ }
++#endif /* CONFIG_XFS_RT */
+
+ /*
+ * Check if the endoff is outside the last extent. If so the caller will grow
+--- a/fs/xfs/xfs_bmap_util.h
++++ b/fs/xfs/xfs_bmap_util.h
+@@ -28,7 +28,20 @@ struct xfs_mount;
+ struct xfs_trans;
+ struct xfs_bmalloca;
+
++#ifdef CONFIG_XFS_RT
+ int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
++#else /* !CONFIG_XFS_RT */
++/*
++ * Attempts to allocate RT extents when RT is disable indicates corruption and
++ * should trigger a shutdown.
++ */
++static inline int
++xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
++{
++ return -EFSCORRUPTED;
++}
++#endif /* CONFIG_XFS_RT */
++
+ int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
+ int whichfork, int *eof);
+ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
--- /dev/null
+From 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:42:09 -0700
+Subject: xfs: perag initialization should only touch m_ag_max_usable for AG 0
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb upstream.
+
+We call __xfs_ag_resv_init to make a per-AG reservation for each AG.
+This makes the reservation per-AG, not per-filesystem. Therefore, it
+is incorrect to adjust m_ag_max_usable for each AG. Adjust it only
+when we're reserving AG 0's blocks so that we only do it once per fs.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_ag_resv.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -157,7 +157,8 @@ __xfs_ag_resv_free(
+ trace_xfs_ag_resv_free(pag, type, 0);
+
+ resv = xfs_perag_resv(pag, type);
+- pag->pag_mount->m_ag_max_usable += resv->ar_asked;
++ if (pag->pag_agno == 0)
++ pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+ /*
+ * AGFL blocks are always considered "free", so whatever
+ * was reserved at mount time must be given back at umount.
+@@ -217,7 +218,14 @@ __xfs_ag_resv_init(
+ return error;
+ }
+
+- mp->m_ag_max_usable -= ask;
++ /*
++ * Reduce the maximum per-AG allocation length by however much we're
++ * trying to reserve for an AG. Since this is a filesystem-wide
++ * counter, we only make the adjustment for AG 0. This assumes that
++ * there aren't any AGs hungrier for per-AG reservation than AG 0.
++ */
++ if (pag->pag_agno == 0)
++ mp->m_ag_max_usable -= ask;
+
+ resv = xfs_perag_resv(pag, type);
+ resv->ar_asked = ask;
--- /dev/null
+From f35c5e10c6ed6ba52a8dd8573924a80b6a02f03f Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Mon, 9 Oct 2017 11:38:56 -0700
+Subject: xfs: reinit btree pointer on attr tree inactivation walk
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f35c5e10c6ed6ba52a8dd8573924a80b6a02f03f upstream.
+
+xfs_attr3_root_inactive() walks the attr fork tree to invalidate the
+associated blocks. xfs_attr3_node_inactive() recursively descends
+from internal blocks to leaf blocks, caching block address values
+along the way to revisit parent blocks, locate the next entry and
+descend down that branch of the tree.
+
+The code that attempts to reread the parent block is unsafe because
+it assumes that the local xfs_da_node_entry pointer remains valid
+after an xfs_trans_brelse() and re-read of the parent buffer. Under
+heavy memory pressure, it is possible that the buffer has been
+reclaimed and reallocated by the time the parent block is reread.
+This means that 'btree' can point to an invalid memory address, lead
+to a random/garbage value for child_fsb and cause the subsequent
+read of the attr fork to go off the rails and return a NULL buffer
+for an attr fork offset that is most likely not allocated.
+
+Note that this problem can be manufactured by setting
+XFS_ATTR_BTREE_REF to 0 to prevent LRU caching of attr buffers,
+creating a file with a multi-level attr fork and removing it to
+trigger inactivation.
+
+To address this problem, reinit the node/btree pointers to the
+parent buffer after it has been re-read. This ensures btree points
+to a valid record and allows the walk to proceed.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_attr_inactive.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -302,6 +302,8 @@ xfs_attr3_node_inactive(
+ &bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
++ node = bp->b_addr;
++ btree = dp->d_ops->node_tree_p(node);
+ child_fsb = be32_to_cpu(btree[i + 1].before);
+ xfs_trans_brelse(*trans, bp);
+ }
--- /dev/null
+From d20a5e3851969fa685f118a80e4df670255a4e8d Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Mon, 18 Sep 2017 11:39:23 -0700
+Subject: xfs: report zeroed or not correctly in xfs_zero_range()
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit d20a5e3851969fa685f118a80e4df670255a4e8d upstream.
+
+The 'did_zero' param of xfs_zero_range() was not passed to
+iomap_zero_range() correctly. This was introduced by commit
+7bb41db3ea16 ("xfs: handle 64-bit length in xfs_iozero"), and found
+by code inspection.
+
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -92,7 +92,7 @@ xfs_zero_range(
+ xfs_off_t count,
+ bool *did_zero)
+ {
+- return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
++ return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
+ }
+
+ int
--- /dev/null
+From 40214d128e07dd21bb07a8ed6a7fe2f911281ab2 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Fri, 13 Oct 2017 09:47:46 -0700
+Subject: xfs: trim writepage mapping to within eof
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 40214d128e07dd21bb07a8ed6a7fe2f911281ab2 upstream.
+
+The writeback rework in commit fbcc02561359 ("xfs: Introduce
+writeback context for writepages") introduced a subtle change in
+behavior with regard to the block mapping used across the
+->writepages() sequence. The previous xfs_cluster_write() code would
+only flush pages up to EOF at the time of the writepage, thus
+ensuring that any pages due to file-extending writes would be
+handled on a separate cycle and with a new, updated block mapping.
+
+The updated code establishes a block mapping in xfs_writepage_map()
+that could extend beyond EOF if the file has post-eof preallocation.
+Because we now use the generic writeback infrastructure and pass the
+cached mapping to each writepage call, there is no implicit EOF
+limit in place. If eofblocks trimming occurs during ->writepages(),
+any post-eof portion of the cached mapping becomes invalid. The
+eofblocks code has no means to serialize against writeback because
+there are no pages associated with post-eof blocks. Therefore if an
+eofblocks trim occurs and is followed by a file-extending buffered
+write, not only has the mapping become invalid, but we could end up
+writing a page to disk based on the invalid mapping.
+
+Consider the following sequence of events:
+
+- A buffered write creates a delalloc extent and post-eof
+ speculative preallocation.
+- Writeback starts and on the first writepage cycle, the delalloc
+ extent is converted to real blocks (including the post-eof blocks)
+ and the mapping is cached.
+- The file is closed and xfs_release() trims post-eof blocks. The
+ cached writeback mapping is now invalid.
+- Another buffered write appends the file with a delalloc extent.
+- The concurrent writeback cycle picks up the just written page
+ because the writeback range end is LLONG_MAX. xfs_writepage_map()
+ attributes it to the (now invalid) cached mapping and writes the
+ data to an incorrect location on disk (and where the file offset is
+ still backed by a delalloc extent).
+
+This problem is reproduced by xfstests test generic/464, which
+triggers racing writes, appends, open/closes and writeback requests.
+
+To address this problem, trim the mapping used during writeback to
+within EOF when the mapping is validated. This ensures the mapping
+is revalidated for any pages encountered beyond EOF as of the time
+the current mapping was cached or last validated.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Diagnosed-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_bmap.c | 11 +++++++++++
+ fs/xfs/libxfs/xfs_bmap.h | 1 +
+ fs/xfs/xfs_aops.c | 13 +++++++++++++
+ 3 files changed, 25 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4057,6 +4057,17 @@ xfs_trim_extent(
+ }
+ }
+
++/* trim extent to within eof */
++void
++xfs_trim_extent_eof(
++ struct xfs_bmbt_irec *irec,
++ struct xfs_inode *ip)
++
++{
++ xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
++ i_size_read(VFS_I(ip))));
++}
++
+ /*
+ * Trim the returned map to the required bounds
+ */
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -196,6 +196,7 @@ void xfs_bmap_trace_exlist(struct xfs_in
+
+ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+ xfs_filblks_t len);
++void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
+ int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+ void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+ void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -438,6 +438,19 @@ xfs_imap_valid(
+ {
+ offset >>= inode->i_blkbits;
+
++ /*
++ * We have to make sure the cached mapping is within EOF to protect
++ * against eofblocks trimming on file release leaving us with a stale
++ * mapping. Otherwise, a page for a subsequent file extending buffered
++ * write could get picked up by this writeback cycle and written to the
++ * wrong blocks.
++ *
++ * Note that what we really want here is a generic mapping invalidation
++ * mechanism to protect us from arbitrary extent modifying contexts, not
++ * just eofblocks.
++ */
++ xfs_trim_extent_eof(imap, XFS_I(inode));
++
+ return offset >= imap->br_startoff &&
+ offset < imap->br_startoff + imap->br_blockcount;
+ }
--- /dev/null
+From ee70daaba82d70766d0723b743d9fdeb3b06102a Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Thu, 21 Sep 2017 11:26:18 -0700
+Subject: xfs: update i_size after unwritten conversion in dio completion
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit ee70daaba82d70766d0723b743d9fdeb3b06102a upstream.
+
+Since commit d531d91d6990 ("xfs: always use unwritten extents for
+direct I/O writes"), we start allocating unwritten extents for all
+direct writes to allow appending aio in XFS.
+
+But for dio writes that could extend file size we update the in-core
+inode size first, then convert the unwritten extents to real
+allocations at dio completion time in xfs_dio_write_end_io(). Thus a
+racing direct read could see the new i_size and find the unwritten
+extents first and read zeros instead of actual data, if the direct
+writer also takes a shared iolock.
+
+Fix it by updating the in-core inode size after the unwritten extent
+conversion. To do this, introduce a new boolean argument to
+xfs_iomap_write_unwritten() to tell if we want to update in-core
+i_size or not.
+
+Suggested-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+[hch: backported to the old direct I/O code before Linux 4.10]
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_aops.c | 25 +++++++++++++++++--------
+ fs/xfs/xfs_iomap.c | 7 +++++--
+ fs/xfs/xfs_iomap.h | 2 +-
+ fs/xfs/xfs_pnfs.c | 2 +-
+ 4 files changed, 24 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -335,7 +335,8 @@ xfs_end_io(
+ error = xfs_reflink_end_cow(ip, offset, size);
+ break;
+ case XFS_IO_UNWRITTEN:
+- error = xfs_iomap_write_unwritten(ip, offset, size);
++ /* writeback should never update isize */
++ error = xfs_iomap_write_unwritten(ip, offset, size, false);
+ break;
+ default:
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+@@ -1532,6 +1533,21 @@ xfs_end_io_direct_write(
+ return 0;
+ }
+
++ if (flags & XFS_DIO_FLAG_COW)
++ error = xfs_reflink_end_cow(ip, offset, size);
++
++ /*
++ * Unwritten conversion updates the in-core isize after extent
++ * conversion but before updating the on-disk size. Updating isize any
++ * earlier allows a racing dio read to find unwritten extents before
++ * they are converted.
++ */
++ if (flags & XFS_DIO_FLAG_UNWRITTEN) {
++ trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
++
++ return xfs_iomap_write_unwritten(ip, offset, size, true);
++ }
++
+ /*
+ * We need to update the in-core inode size here so that we don't end up
+ * with the on-disk inode size being outside the in-core inode size. We
+@@ -1548,13 +1564,6 @@ xfs_end_io_direct_write(
+ i_size_write(inode, offset + size);
+ spin_unlock(&ip->i_flags_lock);
+
+- if (flags & XFS_DIO_FLAG_COW)
+- error = xfs_reflink_end_cow(ip, offset, size);
+- if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+- trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
+-
+- error = xfs_iomap_write_unwritten(ip, offset, size);
+- }
+ if (flags & XFS_DIO_FLAG_APPEND) {
+ trace_xfs_end_io_direct_write_append(ip, offset, size);
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -836,7 +836,8 @@ int
+ xfs_iomap_write_unwritten(
+ xfs_inode_t *ip,
+ xfs_off_t offset,
+- xfs_off_t count)
++ xfs_off_t count,
++ bool update_isize)
+ {
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb;
+@@ -847,6 +848,7 @@ xfs_iomap_write_unwritten(
+ xfs_trans_t *tp;
+ xfs_bmbt_irec_t imap;
+ struct xfs_defer_ops dfops;
++ struct inode *inode = VFS_I(ip);
+ xfs_fsize_t i_size;
+ uint resblks;
+ int error;
+@@ -906,7 +908,8 @@ xfs_iomap_write_unwritten(
+ i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
+ if (i_size > offset + count)
+ i_size = offset + count;
+-
++ if (update_isize && i_size > i_size_read(inode))
++ i_size_write(inode, i_size);
+ i_size = xfs_new_eof(ip, i_size);
+ if (i_size) {
+ ip->i_d.di_size = i_size;
+--- a/fs/xfs/xfs_iomap.h
++++ b/fs/xfs/xfs_iomap.h
+@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_in
+ struct xfs_bmbt_irec *, int);
+ int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
+ struct xfs_bmbt_irec *);
+-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
++int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
+
+ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
+ struct xfs_bmbt_irec *);
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -279,7 +279,7 @@ xfs_fs_commit_blocks(
+ (end - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(error);
+
+- error = xfs_iomap_write_unwritten(ip, start, length);
++ error = xfs_iomap_write_unwritten(ip, start, length, false);
+ if (error)
+ goto out_drop_iolock;
+ }