From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 24 Oct 2017 12:54:24 +0000 (+0200)
Subject: 4.9-stable patches
X-Git-Tag: v3.18.78~4
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5df2dcbf4b14671192cb3f3bc296f3059a205550;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	fs-xfs-use-ps-printk-format-for-direct-addresses.patch
	xfs-always-swap-the-cow-forks-when-swapping-extents.patch
	xfs-cancel-dirty-pages-on-invalidation.patch
	xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch
	xfs-don-t-change-inode-mode-if-acl-update-fails.patch
	xfs-don-t-log-uninitialised-fields-in-inode-structures.patch
	xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch
	xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch
	xfs-handle-error-if-xfs_btree_get_bufs-fails.patch
	xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch
	xfs-move-more-rt-specific-code-under-config_xfs_rt.patch
	xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch
	xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch
	xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch
	xfs-trim-writepage-mapping-to-within-eof.patch
	xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch
---

diff --git a/queue-4.9/fs-xfs-use-ps-printk-format-for-direct-addresses.patch b/queue-4.9/fs-xfs-use-ps-printk-format-for-direct-addresses.patch
new file mode 100644
index 00000000000..55bd9d50533
--- /dev/null
+++ b/queue-4.9/fs-xfs-use-ps-printk-format-for-direct-addresses.patch
@@ -0,0 +1,34 @@
+From e150dcd459e1b441eaf08f341a986f04e61bf3b8 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 18 Sep 2017 11:34:16 -0700
+Subject: fs/xfs: Use %pS printk format for direct addresses
+
+From: Helge Deller <deller@gmx.de>
+
+commit e150dcd459e1b441eaf08f341a986f04e61bf3b8 upstream.
+
+Use the %pS instead of the %pF printk format specifier for printing symbols
+from direct addresses. This is needed for the ia64, ppc64 and parisc64
+architectures.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_error.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -167,7 +167,7 @@ xfs_verifier_error(
+ {
+ 	struct xfs_mount *mp = bp->b_target->bt_mount;
+ 
+-	xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
++	xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
+ 		  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
+ 		  __return_address, bp->b_ops->name, bp->b_bn);
+ 
diff --git a/queue-4.9/series b/queue-4.9/series
index 302aa6f59d4..367f2797e1d 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -30,3 +30,19 @@ lib-digsig-fix-dereference-of-null-user_key_payload.patch
 keys-don-t-let-add_key-update-an-uninstantiated-key.patch
 pkcs7-prevent-null-pointer-dereference-since-sinfo-is-not-always-set.patch
 vmbus-fix-missing-signaling-in-hv_signal_on_read.patch
+xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch
+xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch
+fs-xfs-use-ps-printk-format-for-direct-addresses.patch
+xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch
+xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch
+xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch
+xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch
+xfs-always-swap-the-cow-forks-when-swapping-extents.patch
+xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch
+xfs-don-t-log-uninitialised-fields-in-inode-structures.patch
+xfs-move-more-rt-specific-code-under-config_xfs_rt.patch
+xfs-don-t-change-inode-mode-if-acl-update-fails.patch
+xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch
+xfs-handle-error-if-xfs_btree_get_bufs-fails.patch
+xfs-cancel-dirty-pages-on-invalidation.patch
+xfs-trim-writepage-mapping-to-within-eof.patch
diff --git a/queue-4.9/xfs-always-swap-the-cow-forks-when-swapping-extents.patch b/queue-4.9/xfs-always-swap-the-cow-forks-when-swapping-extents.patch
new file mode 100644
index 00000000000..8311f3a5e4f
--- /dev/null
+++ b/queue-4.9/xfs-always-swap-the-cow-forks-when-swapping-extents.patch
@@ -0,0 +1,58 @@
+From 52bfcdd7adbc26639bc7b2356ab9a3f5dad68ad6 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:18 -0700
+Subject: xfs: always swap the cow forks when swapping extents
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 52bfcdd7adbc26639bc7b2356ab9a3f5dad68ad6 upstream.
+
+Since the CoW fork exists as a secondary data structure to the data
+fork, we must always swap cow forks during swapext.  We also need to
+swap the extent counts and reset the cowblocks tags.
+
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c |   24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -2106,11 +2106,31 @@ xfs_swap_extents(
+ 		ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
+ 		tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ 		tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
++	}
++
++	/* Swap the cow forks. */
++	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
++		xfs_extnum_t	extnum;
++
++		ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
++		ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
++
++		extnum = ip->i_cnextents;
++		ip->i_cnextents = tip->i_cnextents;
++		tip->i_cnextents = extnum;
++
+ 		cowfp = ip->i_cowfp;
+ 		ip->i_cowfp = tip->i_cowfp;
+ 		tip->i_cowfp = cowfp;
+-		xfs_inode_set_cowblocks_tag(ip);
+-		xfs_inode_set_cowblocks_tag(tip);
++
++		if (ip->i_cowfp && ip->i_cnextents)
++			xfs_inode_set_cowblocks_tag(ip);
++		else
++			xfs_inode_clear_cowblocks_tag(ip);
++		if (tip->i_cowfp && tip->i_cnextents)
++			xfs_inode_set_cowblocks_tag(tip);
++		else
++			xfs_inode_clear_cowblocks_tag(tip);
+ 	}
+ 
+ 	xfs_trans_log_inode(tp, ip,  src_log_flags);
diff --git a/queue-4.9/xfs-cancel-dirty-pages-on-invalidation.patch b/queue-4.9/xfs-cancel-dirty-pages-on-invalidation.patch
new file mode 100644
index 00000000000..db79bb98614
--- /dev/null
+++ b/queue-4.9/xfs-cancel-dirty-pages-on-invalidation.patch
@@ -0,0 +1,103 @@
+From 793d7dbe6d82a50b9d14bf992b9eaacb70a11ce6 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Fri, 13 Oct 2017 09:47:45 -0700
+Subject: xfs: cancel dirty pages on invalidation
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 793d7dbe6d82a50b9d14bf992b9eaacb70a11ce6 upstream.
+
+Recently we've had warnings arise from the vm handing us pages
+without bufferheads attached to them. This should not ever occur
+in XFS, but we don't defend against it properly if it does. The only
+place where we remove bufferheads from a page is in
+xfs_vm_releasepage(), but we can't tell the difference here between
+"page is dirty so don't release" and "page is dirty but is being
+invalidated so release it".
+
+In some places that are invalidating pages ask for pages to be
+released and follow up afterward calling ->releasepage by checking
+whether the page was dirty and then aborting the invalidation. This
+is a possible vector for releasing buffers from a page but then
+leaving it in the mapping, so we really do need to avoid dirty pages
+in xfs_vm_releasepage().
+
+To differentiate between invalidated pages and normal pages, we need
+to clear the page dirty flag when invalidating the pages. This can
+be done through xfs_vm_invalidatepage(), and will result
+xfs_vm_releasepage() seeing the page as clean which matches the
+bufferhead state on the page after calling block_invalidatepage().
+
+Hence we can re-add the page dirty check in xfs_vm_releasepage to
+catch the case where we might be releasing a page that is actually
+dirty and so should not have the bufferheads on it removed. This
+will remove one possible vector of "dirty page with no bufferheads"
+and so help narrow down the search for the root cause of that
+problem.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_aops.c |   34 ++++++++++++++++++++++------------
+ 1 file changed, 22 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -726,6 +726,14 @@ xfs_vm_invalidatepage(
+ {
+ 	trace_xfs_invalidatepage(page->mapping->host, page, offset,
+ 				 length);
++
++	/*
++	 * If we are invalidating the entire page, clear the dirty state from it
++	 * so that we can check for attempts to release dirty cached pages in
++	 * xfs_vm_releasepage().
++	 */
++	if (offset == 0 && length >= PAGE_SIZE)
++		cancel_dirty_page(page);
+ 	block_invalidatepage(page, offset, length);
+ }
+ 
+@@ -1181,25 +1189,27 @@ xfs_vm_releasepage(
+ 	 * mm accommodates an old ext3 case where clean pages might not have had
+ 	 * the dirty bit cleared. Thus, it can send actual dirty pages to
+ 	 * ->releasepage() via shrink_active_list(). Conversely,
+-	 * block_invalidatepage() can send pages that are still marked dirty
+-	 * but otherwise have invalidated buffers.
++	 * block_invalidatepage() can send pages that are still marked dirty but
++	 * otherwise have invalidated buffers.
+ 	 *
+ 	 * We want to release the latter to avoid unnecessary buildup of the
+-	 * LRU, skip the former and warn if we've left any lingering
+-	 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
+-	 * or unwritten buffers and warn if the page is not dirty. Otherwise
+-	 * try to release the buffers.
++	 * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
++	 * that are entirely invalidated and need to be released.  Hence the
++	 * only time we should get dirty pages here is through
++	 * shrink_active_list() and so we can simply skip those now.
++	 *
++	 * warn if we've left any lingering delalloc/unwritten buffers on clean
++	 * or invalidated pages we are about to release.
+ 	 */
++	if (PageDirty(page))
++		return 0;
++
+ 	xfs_count_page_state(page, &delalloc, &unwritten);
+ 
+-	if (delalloc) {
+-		WARN_ON_ONCE(!PageDirty(page));
++	if (WARN_ON_ONCE(delalloc))
+ 		return 0;
+-	}
+-	if (unwritten) {
+-		WARN_ON_ONCE(!PageDirty(page));
++	if (WARN_ON_ONCE(unwritten))
+ 		return 0;
+-	}
+ 
+ 	return try_to_free_buffers(page);
+ }
diff --git a/queue-4.9/xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch b/queue-4.9/xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch
new file mode 100644
index 00000000000..ec52d50b172
--- /dev/null
+++ b/queue-4.9/xfs-capture-state-of-the-right-inode-in-xfs_iflush_done.patch
@@ -0,0 +1,40 @@
+From 842f6e9f786226c58fcbd5ef80eadca72fdfe652 Mon Sep 17 00:00:00 2001
+From: Carlos Maiolino <cmaiolino@redhat.com>
+Date: Fri, 22 Sep 2017 11:47:46 -0700
+Subject: xfs: Capture state of the right inode in xfs_iflush_done
+
+From: Carlos Maiolino <cmaiolino@redhat.com>
+
+commit 842f6e9f786226c58fcbd5ef80eadca72fdfe652 upstream.
+
+My previous patch: d3a304b6292168b83b45d624784f973fdc1ca674 check for
+XFS_LI_FAILED flag xfs_iflush done, so the failed item can be properly
+resubmitted.
+
+In the loop scanning other inodes being completed, it should check the
+current item for the XFS_LI_FAILED, and not the initial one.
+
+The state of the initial inode is checked after the loop ends
+
+Kudos to Eric for catching this.
+
+Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode_item.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -745,7 +745,7 @@ xfs_iflush_done(
+ 		 */
+ 		iip = INODE_ITEM(blip);
+ 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
+-		    lip->li_flags & XFS_LI_FAILED)
++		    (blip->li_flags & XFS_LI_FAILED))
+ 			need_ail++;
+ 
+ 		blip = next;
diff --git a/queue-4.9/xfs-don-t-change-inode-mode-if-acl-update-fails.patch b/queue-4.9/xfs-don-t-change-inode-mode-if-acl-update-fails.patch
new file mode 100644
index 00000000000..8c0ff5024ee
--- /dev/null
+++ b/queue-4.9/xfs-don-t-change-inode-mode-if-acl-update-fails.patch
@@ -0,0 +1,72 @@
+From 67f2ffe31d1a683170c2ba0ecc643e42a5fdd397 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:23 -0700
+Subject: xfs: don't change inode mode if ACL update fails
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 67f2ffe31d1a683170c2ba0ecc643e42a5fdd397 upstream.
+
+If we get ENOSPC half way through setting the ACL, the inode mode
+can still be changed even though the ACL does not exist. Reorder the
+operation to only change the mode of the inode if the ACL is set
+correctly.
+
+Whilst this does not fix the problem with crash consistency (that requires
+attribute addition to be a deferred op) it does prevent ENOSPC and other
+non-fatal errors setting an xattr to be handled sanely.
+
+This fixes xfstests generic/449.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_acl.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_acl.c
++++ b/fs/xfs/xfs_acl.c
+@@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_
+ int
+ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ {
++	umode_t mode;
++	bool set_mode = false;
+ 	int error = 0;
+ 
+ 	if (!acl)
+@@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct
+ 		return error;
+ 
+ 	if (type == ACL_TYPE_ACCESS) {
+-		umode_t mode;
+-
+ 		error = posix_acl_update_mode(inode, &mode, &acl);
+ 		if (error)
+ 			return error;
+-		error = xfs_set_mode(inode, mode);
+-		if (error)
+-			return error;
++		set_mode = true;
+ 	}
+ 
+  set_acl:
+-	return __xfs_set_acl(inode, acl, type);
++	error =  __xfs_set_acl(inode, acl, type);
++	if (error)
++		return error;
++
++	/*
++	 * We set the mode after successfully updating the ACL xattr because the
++	 * xattr update can fail at ENOSPC and we don't want to change the mode
++	 * if the ACL update hasn't been applied.
++	 */
++	if (set_mode)
++		error = xfs_set_mode(inode, mode);
++
++	return error;
+ }
diff --git a/queue-4.9/xfs-don-t-log-uninitialised-fields-in-inode-structures.patch b/queue-4.9/xfs-don-t-log-uninitialised-fields-in-inode-structures.patch
new file mode 100644
index 00000000000..bb9121e6be3
--- /dev/null
+++ b/queue-4.9/xfs-don-t-log-uninitialised-fields-in-inode-structures.patch
@@ -0,0 +1,226 @@
+From 20413e37d71befd02b5846acdaf5e2564dd1c38e Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:22 -0700
+Subject: xfs: Don't log uninitialised fields in inode structures
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 20413e37d71befd02b5846acdaf5e2564dd1c38e upstream.
+
+Prevent kmemcheck from throwing warnings about reading uninitialised
+memory when formatting inodes into the incore log buffer. There are
+several issues here - we don't always log all the fields in the
+inode log format item, and we never log the inode the
+di_next_unlinked field.
+
+In the case of the inode log format item, this is exacerbated
+by the old xfs_inode_log_format structure padding issue. Hence make
+the padded, 64 bit aligned version of the structure the one we always
+use for formatting the log and get rid of the 64 bit variant. This
+means we'll always log the 64-bit version and so recovery only needs
+to convert from the unpadded 32 bit version from older 32 bit
+kernels.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/xfs/libxfs/xfs_log_format.h |   27 ++++---------
+ fs/xfs/xfs_inode_item.c        |   82 ++++++++++++++++++++---------------------
+ fs/xfs/xfs_ondisk.h            |    2 -
+ 3 files changed, 50 insertions(+), 61 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_log_format.h
++++ b/fs/xfs/libxfs/xfs_log_format.h
+@@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format {
+ 	__uint32_t		ilf_fields;	/* flags for fields logged */
+ 	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+ 	__uint16_t		ilf_dsize;	/* size of data/ext/root */
++	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
+ 	__uint64_t		ilf_ino;	/* inode number */
+ 	union {
+ 		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+@@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format {
+ 	__int32_t		ilf_boffset;	/* off of inode in buffer */
+ } xfs_inode_log_format_t;
+ 
+-typedef struct xfs_inode_log_format_32 {
+-	__uint16_t		ilf_type;	/* inode log item type */
+-	__uint16_t		ilf_size;	/* size of this item */
+-	__uint32_t		ilf_fields;	/* flags for fields logged */
+-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
+-	__uint64_t		ilf_ino;	/* inode number */
+-	union {
+-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+-		uuid_t		ilfu_uuid;	/* mount point value */
+-	} ilf_u;
+-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+-	__int32_t		ilf_len;	/* len of inode buffer */
+-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+-} __attribute__((packed)) xfs_inode_log_format_32_t;
+-
+-typedef struct xfs_inode_log_format_64 {
++/*
++ * Old 32 bit systems will log in this format without the 64 bit
++ * alignment padding. Recovery will detect this and convert it to the
++ * correct format.
++ */
++struct xfs_inode_log_format_32 {
+ 	__uint16_t		ilf_type;	/* inode log item type */
+ 	__uint16_t		ilf_size;	/* size of this item */
+ 	__uint32_t		ilf_fields;	/* flags for fields logged */
+ 	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+ 	__uint16_t		ilf_dsize;	/* size of data/ext/root */
+-	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
+ 	__uint64_t		ilf_ino;	/* inode number */
+ 	union {
+ 		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+@@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 {
+ 	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+ 	__int32_t		ilf_len;	/* len of inode buffer */
+ 	__int32_t		ilf_boffset;	/* off of inode in buffer */
+-} xfs_inode_log_format_64_t;
++} __attribute__((packed));
+ 
+ 
+ /*
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -364,6 +364,9 @@ xfs_inode_to_log_dinode(
+ 	to->di_dmstate = from->di_dmstate;
+ 	to->di_flags = from->di_flags;
+ 
++	/* log a dummy value to ensure log structure is fully initialised */
++	to->di_next_unlinked = NULLAGINO;
++
+ 	if (from->di_version == 3) {
+ 		to->di_changecount = inode->i_version;
+ 		to->di_crtime.t_sec = from->di_crtime.t_sec;
+@@ -404,6 +407,11 @@ xfs_inode_item_format_core(
+  * the second with the on-disk inode structure, and a possible third and/or
+  * fourth with the inode data/extents/b-tree root and inode attributes
+  * data/extents/b-tree root.
++ *
++ * Note: Always use the 64 bit inode log format structure so we don't
++ * leave an uninitialised hole in the format item on 64 bit systems. Log
++ * recovery on 32 bit systems handles this just fine, so there's no reason
++ * for not using an initialising the properly padded structure all the time.
+  */
+ STATIC void
+ xfs_inode_item_format(
+@@ -412,8 +420,8 @@ xfs_inode_item_format(
+ {
+ 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ 	struct xfs_inode	*ip = iip->ili_inode;
+-	struct xfs_inode_log_format *ilf;
+ 	struct xfs_log_iovec	*vecp = NULL;
++	struct xfs_inode_log_format *ilf;
+ 
+ 	ASSERT(ip->i_d.di_version > 1);
+ 
+@@ -425,7 +433,17 @@ xfs_inode_item_format(
+ 	ilf->ilf_boffset = ip->i_imap.im_boffset;
+ 	ilf->ilf_fields = XFS_ILOG_CORE;
+ 	ilf->ilf_size = 2; /* format + core */
+-	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
++
++	/*
++	 * make sure we don't leak uninitialised data into the log in the case
++	 * when we don't log every field in the inode.
++	 */
++	ilf->ilf_dsize = 0;
++	ilf->ilf_asize = 0;
++	ilf->ilf_pad = 0;
++	memset(&ilf->ilf_u.ilfu_uuid, 0, sizeof(ilf->ilf_u.ilfu_uuid));
++
++	xlog_finish_iovec(lv, vecp, sizeof(*ilf));
+ 
+ 	xfs_inode_item_format_core(ip, lv, &vecp);
+ 	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
+@@ -855,48 +873,30 @@ xfs_istale_done(
+ }
+ 
+ /*
+- * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
+- * (which can have different field alignments) to the native version
++ * convert an xfs_inode_log_format struct from the old 32 bit version
++ * (which can have different field alignments) to the native 64 bit version
+  */
+ int
+ xfs_inode_item_format_convert(
+-	xfs_log_iovec_t		*buf,
+-	xfs_inode_log_format_t	*in_f)
++	struct xfs_log_iovec		*buf,
++	struct xfs_inode_log_format	*in_f)
+ {
+-	if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
+-		xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
++	struct xfs_inode_log_format_32	*in_f32 = buf->i_addr;
+ 
+-		in_f->ilf_type = in_f32->ilf_type;
+-		in_f->ilf_size = in_f32->ilf_size;
+-		in_f->ilf_fields = in_f32->ilf_fields;
+-		in_f->ilf_asize = in_f32->ilf_asize;
+-		in_f->ilf_dsize = in_f32->ilf_dsize;
+-		in_f->ilf_ino = in_f32->ilf_ino;
+-		/* copy biggest field of ilf_u */
+-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+-		       in_f32->ilf_u.ilfu_uuid.__u_bits,
+-		       sizeof(uuid_t));
+-		in_f->ilf_blkno = in_f32->ilf_blkno;
+-		in_f->ilf_len = in_f32->ilf_len;
+-		in_f->ilf_boffset = in_f32->ilf_boffset;
+-		return 0;
+-	} else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
+-		xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
+-
+-		in_f->ilf_type = in_f64->ilf_type;
+-		in_f->ilf_size = in_f64->ilf_size;
+-		in_f->ilf_fields = in_f64->ilf_fields;
+-		in_f->ilf_asize = in_f64->ilf_asize;
+-		in_f->ilf_dsize = in_f64->ilf_dsize;
+-		in_f->ilf_ino = in_f64->ilf_ino;
+-		/* copy biggest field of ilf_u */
+-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+-		       in_f64->ilf_u.ilfu_uuid.__u_bits,
+-		       sizeof(uuid_t));
+-		in_f->ilf_blkno = in_f64->ilf_blkno;
+-		in_f->ilf_len = in_f64->ilf_len;
+-		in_f->ilf_boffset = in_f64->ilf_boffset;
+-		return 0;
+-	}
+-	return -EFSCORRUPTED;
++	if (buf->i_len != sizeof(*in_f32))
++		return -EFSCORRUPTED;
++
++	in_f->ilf_type = in_f32->ilf_type;
++	in_f->ilf_size = in_f32->ilf_size;
++	in_f->ilf_fields = in_f32->ilf_fields;
++	in_f->ilf_asize = in_f32->ilf_asize;
++	in_f->ilf_dsize = in_f32->ilf_dsize;
++	in_f->ilf_ino = in_f32->ilf_ino;
++	/* copy biggest field of ilf_u */
++	memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
++	       in_f32->ilf_u.ilfu_uuid.__u_bits, sizeof(uuid_t));
++	in_f->ilf_blkno = in_f32->ilf_blkno;
++	in_f->ilf_len = in_f32->ilf_len;
++	in_f->ilf_boffset = in_f32->ilf_boffset;
++	return 0;
+ }
+--- a/fs/xfs/xfs_ondisk.h
++++ b/fs/xfs/xfs_ondisk.h
+@@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void)
+ 	XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log,		28);
+ 	XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp,		8);
+ 	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32,	52);
+-	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64,	56);
++	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format,	56);
+ 	XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,	20);
+ 	XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header,		16);
+ }
diff --git a/queue-4.9/xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch b/queue-4.9/xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch
new file mode 100644
index 00000000000..4af867509db
--- /dev/null
+++ b/queue-4.9/xfs-don-t-unconditionally-clear-the-reflink-flag-on-zero-block-files.patch
@@ -0,0 +1,42 @@
+From cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:16 -0700
+Subject: xfs: don't unconditionally clear the reflink flag on zero-block files
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 upstream.
+
+If we have speculative cow preallocations hanging around in the cow
+fork, don't let a truncate operation clear the reflink flag because if
+we do then there's a chance we'll forget to free those extents when we
+destroy the incore inode.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1632,10 +1632,12 @@ xfs_itruncate_extents(
+ 		goto out;
+ 
+ 	/*
+-	 * Clear the reflink flag if we truncated everything.
++	 * Clear the reflink flag if there are no data fork blocks and
++	 * there are no extents staged in the cow fork.
+ 	 */
+-	if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
+-		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
++	if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
++		if (ip->i_d.di_nblocks == 0)
++			ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ 		xfs_inode_clear_cowblocks_tag(ip);
+ 	}
+ 
diff --git a/queue-4.9/xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch b/queue-4.9/xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch
new file mode 100644
index 00000000000..9b6e05f349c
--- /dev/null
+++ b/queue-4.9/xfs-evict-cow-fork-extents-when-performing-finsert-fcollapse.patch
@@ -0,0 +1,46 @@
+From 3af423b03435c81036fa710623d3ae92fbe346a3 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:41:17 -0700
+Subject: xfs: evict CoW fork extents when performing finsert/fcollapse
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 3af423b03435c81036fa710623d3ae92fbe346a3 upstream.
+
+When we perform an finsert/fcollapse operation, cancel all the CoW
+extents for the affected file offset range so that they don't end up
+pointing to the wrong blocks.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1445,7 +1445,19 @@ xfs_shift_file_space(
+ 		return error;
+ 
+ 	/*
+-	 * The extent shiting code works on extent granularity. So, if
++	 * Clean out anything hanging around in the cow fork now that
++	 * we've flushed all the dirty data out to disk to avoid having
++	 * CoW extents at the wrong offsets.
++	 */
++	if (xfs_is_reflink_inode(ip)) {
++		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
++				true);
++		if (error)
++			return error;
++	}
++
++	/*
++	 * The extent shifting code works on extent granularity. So, if
+ 	 * stop_fsb is not the starting block of extent, we need to split
+ 	 * the extent at stop_fsb.
+ 	 */
diff --git a/queue-4.9/xfs-handle-error-if-xfs_btree_get_bufs-fails.patch b/queue-4.9/xfs-handle-error-if-xfs_btree_get_bufs-fails.patch
new file mode 100644
index 00000000000..b157b28cdb9
--- /dev/null
+++ b/queue-4.9/xfs-handle-error-if-xfs_btree_get_bufs-fails.patch
@@ -0,0 +1,60 @@
+From 93e8befc17f6d6ea92b0aee3741ceac8bca4590f Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@sandeen.net>
+Date: Mon, 9 Oct 2017 21:08:06 -0700
+Subject: xfs: handle error if xfs_btree_get_bufs fails
+
+From: Eric Sandeen <sandeen@sandeen.net>
+
+commit 93e8befc17f6d6ea92b0aee3741ceac8bca4590f upstream.
+
+Jason reported that a corrupted filesystem failed to replay
+the log with a metadata block out of bounds warning:
+
+XFS (dm-2): _xfs_buf_find: Block out of range: block 0x80270fff8, EOFS 0x9c40000
+
+_xfs_buf_find() and xfs_btree_get_bufs() return NULL if
+that happens, and then when xfs_alloc_fix_freelist() calls
+xfs_trans_binval() on that NULL bp, we oops with:
+
+BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8
+
+We don't handle _xfs_buf_find errors very well, every
+caller higher up the stack gets to guess at why it failed.
+But we should at least handle it somehow, so return
+EFSCORRUPTED here.
+
+Reported-by: Jason L Tibbitts III <tibbs@math.uh.edu>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_alloc.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -1579,6 +1579,10 @@ xfs_alloc_ag_vextent_small(
+ 
+ 				bp = xfs_btree_get_bufs(args->mp, args->tp,
+ 					args->agno, fbno, 0);
++				if (!bp) {
++					error = -EFSCORRUPTED;
++					goto error0;
++				}
+ 				xfs_trans_binval(args->tp, bp);
+ 			}
+ 			args->len = 1;
+@@ -2136,6 +2140,10 @@ xfs_alloc_fix_freelist(
+ 		if (error)
+ 			goto out_agbp_relse;
+ 		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
++		if (!bp) {
++			error = -EFSCORRUPTED;
++			goto out_agbp_relse;
++		}
+ 		xfs_trans_binval(tp, bp);
+ 	}
+ 
diff --git a/queue-4.9/xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch b/queue-4.9/xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch
new file mode 100644
index 00000000000..17d555b9ab9
--- /dev/null
+++ b/queue-4.9/xfs-handle-racy-aio-in-xfs_reflink_end_cow.patch
@@ -0,0 +1,48 @@
+From e12199f85d0ad1b04ce6c425ad93cd847fe930bb Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 3 Oct 2017 08:58:33 -0700
+Subject:  xfs: handle racy AIO in xfs_reflink_end_cow
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit e12199f85d0ad1b04ce6c425ad93cd847fe930bb upstream.
+
+If we got two AIO writes into a COW area the second one might not have any
+COW extents left to convert.  Handle that case gracefully instead of
+triggering an assert or accessing beyond the bounds of the extent list.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/xfs/xfs_reflink.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -767,7 +767,13 @@ xfs_reflink_end_cow(
+ 
+ 	/* If there is a hole at end_fsb - 1 go to the previous extent */
+ 	if (eof || got.br_startoff > end_fsb) {
+-		ASSERT(idx > 0);
++		/*
++		 * In case of racing, overlapping AIO writes no COW extents
++		 * might be left by the time I/O completes for the loser of
++		 * the race.  In that case we are done.
++		 */
++		if (idx <= 0)
++			goto out_cancel;
+ 		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+ 	}
+ 
+@@ -841,6 +847,7 @@ next_extent:
+ 
+ out_defer:
+ 	xfs_defer_cancel(&dfops);
++out_cancel:
+ 	xfs_trans_cancel(tp);
+ 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ out:
diff --git a/queue-4.9/xfs-move-more-rt-specific-code-under-config_xfs_rt.patch b/queue-4.9/xfs-move-more-rt-specific-code-under-config_xfs_rt.patch
new file mode 100644
index 00000000000..88cbd71c0ba
--- /dev/null
+++ b/queue-4.9/xfs-move-more-rt-specific-code-under-config_xfs_rt.patch
@@ -0,0 +1,69 @@
+From bb9c2e5433250f5b477035dc478314f8e6dd5e36 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 9 Oct 2017 11:37:22 -0700
+Subject: xfs: move more RT specific code under CONFIG_XFS_RT
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit bb9c2e5433250f5b477035dc478314f8e6dd5e36 upstream.
+
+Various utility functions and interfaces that iterate internal
+devices try to reference the realtime device even when RT support is
+not compiled into the kernel.
+
+Make sure this code is excluded from the CONFIG_XFS_RT=n build,
+and where appropriate stub functions to return fatal errors if
+they ever get called when RT support is not present.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_bmap_util.c |    2 ++
+ fs/xfs/xfs_bmap_util.h |   13 +++++++++++++
+ 2 files changed, 15 insertions(+)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -84,6 +84,7 @@ xfs_zero_extent(
+ 		GFP_NOFS, true);
+ }
+ 
++#ifdef CONFIG_XFS_RT
+ int
+ xfs_bmap_rtalloc(
+ 	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
+@@ -195,6 +196,7 @@ xfs_bmap_rtalloc(
+ 	}
+ 	return 0;
+ }
++#endif /* CONFIG_XFS_RT */
+ 
+ /*
+  * Check if the endoff is outside the last extent. If so the caller will grow
+--- a/fs/xfs/xfs_bmap_util.h
++++ b/fs/xfs/xfs_bmap_util.h
+@@ -28,7 +28,20 @@ struct xfs_mount;
+ struct xfs_trans;
+ struct xfs_bmalloca;
+ 
++#ifdef CONFIG_XFS_RT
+ int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
++#else /* !CONFIG_XFS_RT */
++/*
++ * Attempts to allocate RT extents when RT is disable indicates corruption and
++ * should trigger a shutdown.
++ */
++static inline int
++xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
++{
++	return -EFSCORRUPTED;
++}
++#endif /* CONFIG_XFS_RT */
++
+ int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
+ 		     int whichfork, int *eof);
+ int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
diff --git a/queue-4.9/xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch b/queue-4.9/xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch
new file mode 100644
index 00000000000..e161a52a942
--- /dev/null
+++ b/queue-4.9/xfs-perag-initialization-should-only-touch-m_ag_max_usable-for-ag-0.patch
@@ -0,0 +1,50 @@
+From 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 18 Sep 2017 09:42:09 -0700
+Subject: xfs: perag initialization should only touch m_ag_max_usable for AG 0
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb upstream.
+
+We call __xfs_ag_resv_init to make a per-AG reservation for each AG.
+This makes the reservation per-AG, not per-filesystem.  Therefore, it
+is incorrect to adjust m_ag_max_usable for each AG.  Adjust it only
+when we're reserving AG 0's blocks so that we only do it once per fs.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_ag_resv.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -157,7 +157,8 @@ __xfs_ag_resv_free(
+ 	trace_xfs_ag_resv_free(pag, type, 0);
+ 
+ 	resv = xfs_perag_resv(pag, type);
+-	pag->pag_mount->m_ag_max_usable += resv->ar_asked;
++	if (pag->pag_agno == 0)
++		pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+ 	/*
+ 	 * AGFL blocks are always considered "free", so whatever
+ 	 * was reserved at mount time must be given back at umount.
+@@ -217,7 +218,14 @@ __xfs_ag_resv_init(
+ 		return error;
+ 	}
+ 
+-	mp->m_ag_max_usable -= ask;
++	/*
++	 * Reduce the maximum per-AG allocation length by however much we're
++	 * trying to reserve for an AG.  Since this is a filesystem-wide
++	 * counter, we only make the adjustment for AG 0.  This assumes that
++	 * there aren't any AGs hungrier for per-AG reservation than AG 0.
++	 */
++	if (pag->pag_agno == 0)
++		mp->m_ag_max_usable -= ask;
+ 
+ 	resv = xfs_perag_resv(pag, type);
+ 	resv->ar_asked = ask;
diff --git a/queue-4.9/xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch b/queue-4.9/xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch
new file mode 100644
index 00000000000..ab2abafef3d
--- /dev/null
+++ b/queue-4.9/xfs-reinit-btree-pointer-on-attr-tree-inactivation-walk.patch
@@ -0,0 +1,54 @@
+From f35c5e10c6ed6ba52a8dd8573924a80b6a02f03f Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Mon, 9 Oct 2017 11:38:56 -0700
+Subject: xfs: reinit btree pointer on attr tree inactivation walk
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f35c5e10c6ed6ba52a8dd8573924a80b6a02f03f upstream.
+
+xfs_attr3_root_inactive() walks the attr fork tree to invalidate the
+associated blocks. xfs_attr3_node_inactive() recursively descends
+from internal blocks to leaf blocks, caching block address values
+along the way to revisit parent blocks, locate the next entry and
+descend down that branch of the tree.
+
+The code that attempts to reread the parent block is unsafe because
+it assumes that the local xfs_da_node_entry pointer remains valid
+after an xfs_trans_brelse() and re-read of the parent buffer. Under
+heavy memory pressure, it is possible that the buffer has been
+reclaimed and reallocated by the time the parent block is reread.
+This means that 'btree' can point to an invalid memory address, lead
+to a random/garbage value for child_fsb and cause the subsequent
+read of the attr fork to go off the rails and return a NULL buffer
+for an attr fork offset that is most likely not allocated.
+
+Note that this problem can be manufactured by setting
+XFS_ATTR_BTREE_REF to 0 to prevent LRU caching of attr buffers,
+creating a file with a multi-level attr fork and removing it to
+trigger inactivation.
+
+To address this problem, reinit the node/btree pointers to the
+parent buffer after it has been re-read. This ensures btree points
+to a valid record and allows the walk to proceed.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_attr_inactive.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -302,6 +302,8 @@ xfs_attr3_node_inactive(
+ 						 &bp, XFS_ATTR_FORK);
+ 			if (error)
+ 				return error;
++			node = bp->b_addr;
++			btree = dp->d_ops->node_tree_p(node);
+ 			child_fsb = be32_to_cpu(btree[i + 1].before);
+ 			xfs_trans_brelse(*trans, bp);
+ 		}
diff --git a/queue-4.9/xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch b/queue-4.9/xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch
new file mode 100644
index 00000000000..e9774532bb0
--- /dev/null
+++ b/queue-4.9/xfs-report-zeroed-or-not-correctly-in-xfs_zero_range.patch
@@ -0,0 +1,36 @@
+From d20a5e3851969fa685f118a80e4df670255a4e8d Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Mon, 18 Sep 2017 11:39:23 -0700
+Subject: xfs: report zeroed or not correctly in xfs_zero_range()
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit d20a5e3851969fa685f118a80e4df670255a4e8d upstream.
+
+The 'did_zero' param of xfs_zero_range() was not passed to
+iomap_zero_range() correctly. This was introduced by commit
+7bb41db3ea16 ("xfs: handle 64-bit length in xfs_iozero"), and found
+by code inspection.
+
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -92,7 +92,7 @@ xfs_zero_range(
+ 	xfs_off_t		count,
+ 	bool			*did_zero)
+ {
+-	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
++	return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
+ }
+ 
+ int
diff --git a/queue-4.9/xfs-trim-writepage-mapping-to-within-eof.patch b/queue-4.9/xfs-trim-writepage-mapping-to-within-eof.patch
new file mode 100644
index 00000000000..f48e07aa504
--- /dev/null
+++ b/queue-4.9/xfs-trim-writepage-mapping-to-within-eof.patch
@@ -0,0 +1,119 @@
+From 40214d128e07dd21bb07a8ed6a7fe2f911281ab2 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Fri, 13 Oct 2017 09:47:46 -0700
+Subject: xfs: trim writepage mapping to within eof
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 40214d128e07dd21bb07a8ed6a7fe2f911281ab2 upstream.
+
+The writeback rework in commit fbcc02561359 ("xfs: Introduce
+writeback context for writepages") introduced a subtle change in
+behavior with regard to the block mapping used across the
+->writepages() sequence. The previous xfs_cluster_write() code would
+only flush pages up to EOF at the time of the writepage, thus
+ensuring that any pages due to file-extending writes would be
+handled on a separate cycle and with a new, updated block mapping.
+
+The updated code establishes a block mapping in xfs_writepage_map()
+that could extend beyond EOF if the file has post-eof preallocation.
+Because we now use the generic writeback infrastructure and pass the
+cached mapping to each writepage call, there is no implicit EOF
+limit in place. If eofblocks trimming occurs during ->writepages(),
+any post-eof portion of the cached mapping becomes invalid. The
+eofblocks code has no means to serialize against writeback because
+there are no pages associated with post-eof blocks. Therefore if an
+eofblocks trim occurs and is followed by a file-extending buffered
+write, not only has the mapping become invalid, but we could end up
+writing a page to disk based on the invalid mapping.
+
+Consider the following sequence of events:
+
+- A buffered write creates a delalloc extent and post-eof
+  speculative preallocation.
+- Writeback starts and on the first writepage cycle, the delalloc
+  extent is converted to real blocks (including the post-eof blocks)
+  and the mapping is cached.
+- The file is closed and xfs_release() trims post-eof blocks. The
+  cached writeback mapping is now invalid.
+- Another buffered write appends the file with a delalloc extent.
+- The concurrent writeback cycle picks up the just written page
+  because the writeback range end is LLONG_MAX. xfs_writepage_map()
+  attributes it to the (now invalid) cached mapping and writes the
+  data to an incorrect location on disk (and where the file offset is
+  still backed by a delalloc extent).
+
+This problem is reproduced by xfstests test generic/464, which
+triggers racing writes, appends, open/closes and writeback requests.
+
+To address this problem, trim the mapping used during writeback to
+within EOF when the mapping is validated. This ensures the mapping
+is revalidated for any pages encountered beyond EOF as of the time
+the current mapping was cached or last validated.
+
+Reported-by: Eryu Guan <eguan@redhat.com>
+Diagnosed-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_bmap.c |   11 +++++++++++
+ fs/xfs/libxfs/xfs_bmap.h |    1 +
+ fs/xfs/xfs_aops.c        |   13 +++++++++++++
+ 3 files changed, 25 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4057,6 +4057,17 @@ xfs_trim_extent(
+ 	}
+ }
+ 
++/* trim extent to within eof */
++void
++xfs_trim_extent_eof(
++	struct xfs_bmbt_irec	*irec,
++	struct xfs_inode	*ip)
++
++{
++	xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
++					      i_size_read(VFS_I(ip))));
++}
++
+ /*
+  * Trim the returned map to the required bounds
+  */
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -196,6 +196,7 @@ void	xfs_bmap_trace_exlist(struct xfs_in
+ 
+ void	xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+ 		xfs_filblks_t len);
++void	xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
+ int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+ void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+ void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -438,6 +438,19 @@ xfs_imap_valid(
+ {
+ 	offset >>= inode->i_blkbits;
+ 
++	/*
++	 * We have to make sure the cached mapping is within EOF to protect
++	 * against eofblocks trimming on file release leaving us with a stale
++	 * mapping. Otherwise, a page for a subsequent file extending buffered
++	 * write could get picked up by this writeback cycle and written to the
++	 * wrong blocks.
++	 *
++	 * Note that what we really want here is a generic mapping invalidation
++	 * mechanism to protect us from arbitrary extent modifying contexts, not
++	 * just eofblocks.
++	 */
++	xfs_trim_extent_eof(imap, XFS_I(inode));
++
+ 	return offset >= imap->br_startoff &&
+ 		offset < imap->br_startoff + imap->br_blockcount;
+ }
diff --git a/queue-4.9/xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch b/queue-4.9/xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch
new file mode 100644
index 00000000000..fc7d6e6f2aa
--- /dev/null
+++ b/queue-4.9/xfs-update-i_size-after-unwritten-conversion-in-dio-completion.patch
@@ -0,0 +1,140 @@
+From ee70daaba82d70766d0723b743d9fdeb3b06102a Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Thu, 21 Sep 2017 11:26:18 -0700
+Subject: xfs: update i_size after unwritten conversion in dio completion
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit ee70daaba82d70766d0723b743d9fdeb3b06102a upstream.
+
+Since commit d531d91d6990 ("xfs: always use unwritten extents for
+direct I/O writes"), we start allocating unwritten extents for all
+direct writes to allow appending aio in XFS.
+
+But for dio writes that could extend file size we update the in-core
+inode size first, then convert the unwritten extents to real
+allocations at dio completion time in xfs_dio_write_end_io(). Thus a
+racing direct read could see the new i_size and find the unwritten
+extents first and read zeros instead of actual data, if the direct
+writer also takes a shared iolock.
+
+Fix it by updating the in-core inode size after the unwritten extent
+conversion. To do this, introduce a new boolean argument to
+xfs_iomap_write_unwritten() to tell if we want to update in-core
+i_size or not.
+
+Suggested-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+[hch: backported to the old direct I/O code before Linux 4.10]
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_aops.c  |   25 +++++++++++++++++--------
+ fs/xfs/xfs_iomap.c |    7 +++++--
+ fs/xfs/xfs_iomap.h |    2 +-
+ fs/xfs/xfs_pnfs.c  |    2 +-
+ 4 files changed, 24 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -335,7 +335,8 @@ xfs_end_io(
+ 		error = xfs_reflink_end_cow(ip, offset, size);
+ 		break;
+ 	case XFS_IO_UNWRITTEN:
+-		error = xfs_iomap_write_unwritten(ip, offset, size);
++		/* writeback should never update isize */
++		error = xfs_iomap_write_unwritten(ip, offset, size, false);
+ 		break;
+ 	default:
+ 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+@@ -1532,6 +1533,21 @@ xfs_end_io_direct_write(
+ 		return 0;
+ 	}
+ 
++	if (flags & XFS_DIO_FLAG_COW)
++		error = xfs_reflink_end_cow(ip, offset, size);
++
++	/*
++	 * Unwritten conversion updates the in-core isize after extent
++	 * conversion but before updating the on-disk size. Updating isize any
++	 * earlier allows a racing dio read to find unwritten extents before
++	 * they are converted.
++	 */
++	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
++		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
++
++		return xfs_iomap_write_unwritten(ip, offset, size, true);
++	}
++
+ 	/*
+ 	 * We need to update the in-core inode size here so that we don't end up
+ 	 * with the on-disk inode size being outside the in-core inode size. We
+@@ -1548,13 +1564,6 @@ xfs_end_io_direct_write(
+ 		i_size_write(inode, offset + size);
+ 	spin_unlock(&ip->i_flags_lock);
+ 
+-	if (flags & XFS_DIO_FLAG_COW)
+-		error = xfs_reflink_end_cow(ip, offset, size);
+-	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+-		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
+-
+-		error = xfs_iomap_write_unwritten(ip, offset, size);
+-	}
+ 	if (flags & XFS_DIO_FLAG_APPEND) {
+ 		trace_xfs_end_io_direct_write_append(ip, offset, size);
+ 
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -836,7 +836,8 @@ int
+ xfs_iomap_write_unwritten(
+ 	xfs_inode_t	*ip,
+ 	xfs_off_t	offset,
+-	xfs_off_t	count)
++	xfs_off_t	count,
++	bool		update_isize)
+ {
+ 	xfs_mount_t	*mp = ip->i_mount;
+ 	xfs_fileoff_t	offset_fsb;
+@@ -847,6 +848,7 @@ xfs_iomap_write_unwritten(
+ 	xfs_trans_t	*tp;
+ 	xfs_bmbt_irec_t imap;
+ 	struct xfs_defer_ops dfops;
++	struct inode	*inode = VFS_I(ip);
+ 	xfs_fsize_t	i_size;
+ 	uint		resblks;
+ 	int		error;
+@@ -906,7 +908,8 @@ xfs_iomap_write_unwritten(
+ 		i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
+ 		if (i_size > offset + count)
+ 			i_size = offset + count;
+-
++		if (update_isize && i_size > i_size_read(inode))
++			i_size_write(inode, i_size);
+ 		i_size = xfs_new_eof(ip, i_size);
+ 		if (i_size) {
+ 			ip->i_d.di_size = i_size;
+--- a/fs/xfs/xfs_iomap.h
++++ b/fs/xfs/xfs_iomap.h
+@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_in
+ 			struct xfs_bmbt_irec *, int);
+ int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
+ 			struct xfs_bmbt_irec *);
+-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
++int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
+ 
+ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
+ 		struct xfs_bmbt_irec *);
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -279,7 +279,7 @@ xfs_fs_commit_blocks(
+ 					(end - 1) >> PAGE_SHIFT);
+ 		WARN_ON_ONCE(error);
+ 
+-		error = xfs_iomap_write_unwritten(ip, start, length);
++		error = xfs_iomap_write_unwritten(ip, start, length, false);
+ 		if (error)
+ 			goto out_drop_iolock;
+ 	}