From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 5 Jun 2017 14:08:53 +0000 (+0200)
Subject: 4.9-stable patches
X-Git-Tag: v3.18.56~9
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b5e5defa077f1e319bb080a0ae2da062353a9e0b;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	xfs-fix-kernel-memory-exposure-problems.patch
	xfs-fix-missed-holes-in-seek_hole-implementation.patch
	xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
	xfs-rework-the-inline-directory-verifiers.patch
	xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
	xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
	xfs-verify-inline-directory-data-forks.patch
---

diff --git a/queue-4.9/series b/queue-4.9/series
index 9255228e0a4..02663fc2301 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -66,3 +66,10 @@ ksm-prevent-crash-after-write_protect_page-fails.patch
 slub-memcg-cure-the-brainless-abuse-of-sysfs-attributes.patch
 mm-slub.c-trace-free-objects-at-kern_info.patch
 drm-gma500-psb-actually-use-vbt-mode-when-it-is-found.patch
+xfs-fix-missed-holes-in-seek_hole-implementation.patch
+xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
+xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
+xfs-verify-inline-directory-data-forks.patch
+xfs-rework-the-inline-directory-verifiers.patch
+xfs-fix-kernel-memory-exposure-problems.patch
+xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
diff --git a/queue-4.9/xfs-fix-kernel-memory-exposure-problems.patch b/queue-4.9/xfs-fix-kernel-memory-exposure-problems.patch
new file mode 100644
index 00000000000..ff07df35e54
--- /dev/null
+++ b/queue-4.9/xfs-fix-kernel-memory-exposure-problems.patch
@@ -0,0 +1,32 @@
+From bf9216f922612d2db7666aae01e65064da2ffb3a Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 3 Apr 2017 12:22:39 -0700
+Subject: xfs: fix kernel memory exposure problems
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit bf9216f922612d2db7666aae01e65064da2ffb3a upstream.
+
+Fix a memory exposure problems in inumbers where we allocate an array of
+structures with holes, fail to zero the holes, then blindly copy the
+kernel memory contents (junk and all) into userspace.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_itable.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_itable.c
++++ b/fs/xfs/xfs_itable.c
+@@ -585,7 +585,7 @@ xfs_inumbers(
+ 		return error;
+ 
+ 	bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
+-	buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
++	buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
+ 	do {
+ 		struct xfs_inobt_rec_incore	r;
+ 		int				stat;
diff --git a/queue-4.9/xfs-fix-missed-holes-in-seek_hole-implementation.patch b/queue-4.9/xfs-fix-missed-holes-in-seek_hole-implementation.patch
new file mode 100644
index 00000000000..a8bbb397fd5
--- /dev/null
+++ b/queue-4.9/xfs-fix-missed-holes-in-seek_hole-implementation.patch
@@ -0,0 +1,87 @@
+From 5375023ae1266553a7baa0845e82917d8803f48c Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 18 May 2017 16:36:22 -0700
+Subject: xfs: Fix missed holes in SEEK_HOLE implementation
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5375023ae1266553a7baa0845e82917d8803f48c upstream.
+
+XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as
+can be seen by the following command:
+
+xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k"
+       -c "seek -h 0" file
+wrote 57344/57344 bytes at offset 0
+56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec)
+wrote 8192/8192 bytes at offset 131072
+8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec)
+Whence	Result
+HOLE	139264
+
+Where we can see that hole at offset 56k was just ignored by SEEK_HOLE
+implementation. The bug is in xfs_find_get_desired_pgoff() which does
+not properly detect the case when pages are not contiguous.
+
+Fix the problem by properly detecting when found page has larger offset
+than expected.
+
+Fixes: d126d43f631f996daeee5006714fed914be32368
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |   29 +++++++++--------------------
+ 1 file changed, 9 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1163,17 +1163,6 @@ xfs_find_get_desired_pgoff(
+ 			break;
+ 		}
+ 
+-		/*
+-		 * At lease we found one page.  If this is the first time we
+-		 * step into the loop, and if the first page index offset is
+-		 * greater than the given search offset, a hole was found.
+-		 */
+-		if (type == HOLE_OFF && lastoff == startoff &&
+-		    lastoff < page_offset(pvec.pages[0])) {
+-			found = true;
+-			break;
+-		}
+-
+ 		for (i = 0; i < nr_pages; i++) {
+ 			struct page	*page = pvec.pages[i];
+ 			loff_t		b_offset;
+@@ -1185,18 +1174,18 @@ xfs_find_get_desired_pgoff(
+ 			 * file mapping. However, page->index will not change
+ 			 * because we have a reference on the page.
+ 			 *
+-			 * Searching done if the page index is out of range.
+-			 * If the current offset is not reaches the end of
+-			 * the specified search range, there should be a hole
+-			 * between them.
++			 * If current page offset is beyond where we've ended,
++			 * we've found a hole.
+ 			 */
+-			if (page->index > end) {
+-				if (type == HOLE_OFF && lastoff < endoff) {
+-					*offset = lastoff;
+-					found = true;
+-				}
++			if (type == HOLE_OFF && lastoff < endoff &&
++			    lastoff < page_offset(pvec.pages[i])) {
++				found = true;
++				*offset = lastoff;
+ 				goto out;
+ 			}
++			/* Searching done if the page index is out of range. */
++			if (page->index > end)
++				goto out;
+ 
+ 			lock_page(page);
+ 			/*
diff --git a/queue-4.9/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch b/queue-4.9/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
new file mode 100644
index 00000000000..80e1e58021d
--- /dev/null
+++ b/queue-4.9/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
@@ -0,0 +1,54 @@
+From 8affebe16d79ebefb1d9d6d56a46dc89716f9453 Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Tue, 23 May 2017 08:30:46 -0700
+Subject: xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff()
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit 8affebe16d79ebefb1d9d6d56a46dc89716f9453 upstream.
+
+xfs_find_get_desired_pgoff() is used to search for offset of hole or
+data in page range [index, end] (both inclusive), and the max number
+of pages to search should be at least one, if end == index.
+Otherwise the only page is missed and no hole or data is found,
+which is not correct.
+
+When block size is smaller than page size, this can be demonstrated
+by preallocating a file with size smaller than page size and writing
+data to the last block. E.g. run this xfs_io command on a 1k block
+size XFS on x86_64 host.
+
+  # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \
+  	    -c "seek -d 0" /mnt/xfs/testfile
+  wrote 1024/1024 bytes at offset 2048
+  1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec)
+  Whence  Result
+  DATA    EOF
+
+Data at offset 2k was missed, and lseek(2) returned ENXIO.
+
+This is uncovered by generic/285 subtest 07 and 08 on ppc64 host,
+where pagesize is 64k. Because a recent change to generic/285
+reduced the preallocated file size to smaller than 64k.
+
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1136,7 +1136,7 @@ xfs_find_get_desired_pgoff(
+ 		unsigned	nr_pages;
+ 		unsigned int	i;
+ 
+-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
++		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
+ 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+ 					  want);
+ 		/*
diff --git a/queue-4.9/xfs-rework-the-inline-directory-verifiers.patch b/queue-4.9/xfs-rework-the-inline-directory-verifiers.patch
new file mode 100644
index 00000000000..1369f2c250f
--- /dev/null
+++ b/queue-4.9/xfs-rework-the-inline-directory-verifiers.patch
@@ -0,0 +1,332 @@
+From 78420281a9d74014af7616958806c3aba056319e Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 3 Apr 2017 12:22:20 -0700
+Subject: xfs: rework the inline directory verifiers
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 78420281a9d74014af7616958806c3aba056319e upstream.
+
+The inline directory verifiers should be called on the inode fork data,
+which means after iformat_local on the read side, and prior to
+ifork_flush on the write side.  This makes the fork verifier more
+consistent with the way buffer verifiers work -- i.e. they will operate
+on the memory buffer that the code will be reading and writing directly.
+
+Furthermore, revise the verifier function to return -EFSCORRUPTED so
+that we don't flood the logs with corruption messages and assert
+notices.  This has been a particular problem with xfs/348, which
+triggers the XFS_WANT_CORRUPTED_RETURN assertions, which halts the
+kernel when CONFIG_XFS_DEBUG=y.  Disk corruption isn't supposed to do
+that, at least not in a verifier.
+
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_dir2_priv.h  |    3 -
+ fs/xfs/libxfs/xfs_dir2_sf.c    |   63 ++++++++++++++++++++++++++---------------
+ fs/xfs/libxfs/xfs_inode_fork.c |   35 ++++++++--------------
+ fs/xfs/libxfs/xfs_inode_fork.h |    2 -
+ fs/xfs/xfs_inode.c             |   19 ++++++------
+ 5 files changed, 66 insertions(+), 56 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_priv.h
++++ b/fs/xfs/libxfs/xfs_dir2_priv.h
+@@ -126,8 +126,7 @@ extern int xfs_dir2_sf_create(struct xfs
+ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+ extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+ extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+-extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+-		int size);
++extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
+ 
+ /* xfs_dir2_readdir.c */
+ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
+--- a/fs/xfs/libxfs/xfs_dir2_sf.c
++++ b/fs/xfs/libxfs/xfs_dir2_sf.c
+@@ -632,36 +632,49 @@ xfs_dir2_sf_check(
+ /* Verify the consistency of an inline directory. */
+ int
+ xfs_dir2_sf_verify(
+-	struct xfs_mount		*mp,
+-	struct xfs_dir2_sf_hdr		*sfp,
+-	int				size)
++	struct xfs_inode		*ip)
+ {
++	struct xfs_mount		*mp = ip->i_mount;
++	struct xfs_dir2_sf_hdr		*sfp;
+ 	struct xfs_dir2_sf_entry	*sfep;
+ 	struct xfs_dir2_sf_entry	*next_sfep;
+ 	char				*endp;
+ 	const struct xfs_dir_ops	*dops;
++	struct xfs_ifork		*ifp;
+ 	xfs_ino_t			ino;
+ 	int				i;
+ 	int				i8count;
+ 	int				offset;
++	int				size;
++	int				error;
+ 	__uint8_t			filetype;
+ 
++	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
++	/*
++	 * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
++	 * so we can only trust the mountpoint to have the right pointer.
++	 */
+ 	dops = xfs_dir_get_ops(mp, NULL);
+ 
++	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
++	sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
++	size = ifp->if_bytes;
++
+ 	/*
+ 	 * Give up if the directory is way too short.
+ 	 */
+-	XFS_WANT_CORRUPTED_RETURN(mp, size >
+-			offsetof(struct xfs_dir2_sf_hdr, parent));
+-	XFS_WANT_CORRUPTED_RETURN(mp, size >=
+-			xfs_dir2_sf_hdr_size(sfp->i8count));
++	if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
++	    size < xfs_dir2_sf_hdr_size(sfp->i8count))
++		return -EFSCORRUPTED;
+ 
+ 	endp = (char *)sfp + size;
+ 
+ 	/* Check .. entry */
+ 	ino = dops->sf_get_parent_ino(sfp);
+ 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+-	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
++	error = xfs_dir_ino_validate(mp, ino);
++	if (error)
++		return error;
+ 	offset = dops->data_first_offset;
+ 
+ 	/* Check all reported entries */
+@@ -672,12 +685,12 @@ xfs_dir2_sf_verify(
+ 		 * Check the fixed-offset parts of the structure are
+ 		 * within the data buffer.
+ 		 */
+-		XFS_WANT_CORRUPTED_RETURN(mp,
+-				((char *)sfep + sizeof(*sfep)) < endp);
++		if (((char *)sfep + sizeof(*sfep)) >= endp)
++			return -EFSCORRUPTED;
+ 
+ 		/* Don't allow names with known bad length. */
+-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
++		if (sfep->namelen == 0)
++			return -EFSCORRUPTED;
+ 
+ 		/*
+ 		 * Check that the variable-length part of the structure is
+@@ -685,33 +698,39 @@ xfs_dir2_sf_verify(
+ 		 * name component, so nextentry is an acceptable test.
+ 		 */
+ 		next_sfep = dops->sf_nextentry(sfp, sfep);
+-		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
++		if (endp < (char *)next_sfep)
++			return -EFSCORRUPTED;
+ 
+ 		/* Check that the offsets always increase. */
+-		XFS_WANT_CORRUPTED_RETURN(mp,
+-				xfs_dir2_sf_get_offset(sfep) >= offset);
++		if (xfs_dir2_sf_get_offset(sfep) < offset)
++			return -EFSCORRUPTED;
+ 
+ 		/* Check the inode number. */
+ 		ino = dops->sf_get_ino(sfp, sfep);
+ 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+-		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
++		error = xfs_dir_ino_validate(mp, ino);
++		if (error)
++			return error;
+ 
+ 		/* Check the file type. */
+ 		filetype = dops->sf_get_ftype(sfep);
+-		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
++		if (filetype >= XFS_DIR3_FT_MAX)
++			return -EFSCORRUPTED;
+ 
+ 		offset = xfs_dir2_sf_get_offset(sfep) +
+ 				dops->data_entsize(sfep->namelen);
+ 
+ 		sfep = next_sfep;
+ 	}
+-	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+-	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
++	if (i8count != sfp->i8count)
++		return -EFSCORRUPTED;
++	if ((void *)sfep != (void *)endp)
++		return -EFSCORRUPTED;
+ 
+ 	/* Make sure this whole thing ought to be in local format. */
+-	XFS_WANT_CORRUPTED_RETURN(mp, offset +
+-	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+-	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
++	if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
++	    (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
++		return -EFSCORRUPTED;
+ 
+ 	return 0;
+ }
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -212,6 +212,16 @@ xfs_iformat_fork(
+ 	if (error)
+ 		return error;
+ 
++	/* Check inline dir contents. */
++	if (S_ISDIR(VFS_I(ip)->i_mode) &&
++	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
++		error = xfs_dir2_sf_verify(ip);
++		if (error) {
++			xfs_idestroy_fork(ip, XFS_DATA_FORK);
++			return error;
++		}
++	}
++
+ 	if (xfs_is_reflink_inode(ip)) {
+ 		ASSERT(ip->i_cowfp == NULL);
+ 		xfs_ifork_init_cow(ip);
+@@ -322,8 +332,6 @@ xfs_iformat_local(
+ 	int		whichfork,
+ 	int		size)
+ {
+-	int		error;
+-
+ 	/*
+ 	 * If the size is unreasonable, then something
+ 	 * is wrong and we just bail out rather than crash in
+@@ -339,14 +347,6 @@ xfs_iformat_local(
+ 		return -EFSCORRUPTED;
+ 	}
+ 
+-	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+-		error = xfs_dir2_sf_verify(ip->i_mount,
+-				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+-				size);
+-		if (error)
+-			return error;
+-	}
+-
+ 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
+ 	return 0;
+ }
+@@ -867,7 +867,7 @@ xfs_iextents_copy(
+  * In these cases, the format always takes precedence, because the
+  * format indicates the current state of the fork.
+  */
+-int
++void
+ xfs_iflush_fork(
+ 	xfs_inode_t		*ip,
+ 	xfs_dinode_t		*dip,
+@@ -877,7 +877,6 @@ xfs_iflush_fork(
+ 	char			*cp;
+ 	xfs_ifork_t		*ifp;
+ 	xfs_mount_t		*mp;
+-	int			error;
+ 	static const short	brootflag[2] =
+ 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+ 	static const short	dataflag[2] =
+@@ -886,7 +885,7 @@ xfs_iflush_fork(
+ 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+ 
+ 	if (!iip)
+-		return 0;
++		return;
+ 	ifp = XFS_IFORK_PTR(ip, whichfork);
+ 	/*
+ 	 * This can happen if we gave up in iformat in an error path,
+@@ -894,19 +893,12 @@ xfs_iflush_fork(
+ 	 */
+ 	if (!ifp) {
+ 		ASSERT(whichfork == XFS_ATTR_FORK);
+-		return 0;
++		return;
+ 	}
+ 	cp = XFS_DFORK_PTR(dip, whichfork);
+ 	mp = ip->i_mount;
+ 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ 	case XFS_DINODE_FMT_LOCAL:
+-		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+-			error = xfs_dir2_sf_verify(mp,
+-					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+-					ifp->if_bytes);
+-			if (error)
+-				return error;
+-		}
+ 		if ((iip->ili_fields & dataflag[whichfork]) &&
+ 		    (ifp->if_bytes > 0)) {
+ 			ASSERT(ifp->if_u1.if_data != NULL);
+@@ -959,7 +951,6 @@ xfs_iflush_fork(
+ 		ASSERT(0);
+ 		break;
+ 	}
+-	return 0;
+ }
+ 
+ /*
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
+ struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
+ 
+ int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
+-int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
++void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+ 				struct xfs_inode_log_item *, int);
+ void		xfs_idestroy_fork(struct xfs_inode *, int);
+ void		xfs_idata_realloc(struct xfs_inode *, int, int);
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -50,6 +50,7 @@
+ #include "xfs_log.h"
+ #include "xfs_bmap_btree.h"
+ #include "xfs_reflink.h"
++#include "xfs_dir2_priv.h"
+ 
+ kmem_zone_t *xfs_inode_zone;
+ 
+@@ -3491,7 +3492,6 @@ xfs_iflush_int(
+ 	struct xfs_inode_log_item *iip = ip->i_itemp;
+ 	struct xfs_dinode	*dip;
+ 	struct xfs_mount	*mp = ip->i_mount;
+-	int			error;
+ 
+ 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+ 	ASSERT(xfs_isiflocked(ip));
+@@ -3563,6 +3563,12 @@ xfs_iflush_int(
+ 	if (ip->i_d.di_version < 3)
+ 		ip->i_d.di_flushiter++;
+ 
++	/* Check the inline directory data. */
++	if (S_ISDIR(VFS_I(ip)->i_mode) &&
++	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
++	    xfs_dir2_sf_verify(ip))
++		goto corrupt_out;
++
+ 	/*
+ 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
+ 	 * copy out the core of the inode, because if the inode is dirty at all
+@@ -3574,14 +3580,9 @@ xfs_iflush_int(
+ 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
+ 		ip->i_d.di_flushiter = 0;
+ 
+-	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+-	if (error)
+-		return error;
+-	if (XFS_IFORK_Q(ip)) {
+-		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+-		if (error)
+-			return error;
+-	}
++	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
++	if (XFS_IFORK_Q(ip))
++		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ 	xfs_inobp_check(mp, bp);
+ 
+ 	/*
diff --git a/queue-4.9/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch b/queue-4.9/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
new file mode 100644
index 00000000000..074033ebde5
--- /dev/null
+++ b/queue-4.9/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
@@ -0,0 +1,155 @@
+From 63db7c815bc0997c29e484d2409684fdd9fcd93b Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Wed, 31 May 2017 08:22:52 -0700
+Subject: xfs: use ->b_state to fix buffer I/O accounting release race
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 63db7c815bc0997c29e484d2409684fdd9fcd93b upstream.
+
+We've had user reports of unmount hangs in xfs_wait_buftarg() that
+analysis shows is due to btp->bt_io_count == -1. bt_io_count
+represents the count of in-flight asynchronous buffers and thus
+should always be >= 0. xfs_wait_buftarg() waits for this value to
+stabilize to zero in order to ensure that all untracked (with
+respect to the lru) buffers have completed I/O processing before
+unmount proceeds to tear down in-core data structures.
+
+The value of -1 implies an I/O accounting decrement race. Indeed,
+the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele()
+(where the buffer lock is no longer held) means that bp->b_flags can
+be updated from an unsafe context. While a user-level reproducer is
+currently not available, some intrusive hacks to run racing buffer
+lookups/ioacct/releases from multiple threads was used to
+successfully manufacture this problem.
+
+Existing callers do not expect to acquire the buffer lock from
+xfs_buf_rele(). Therefore, we can not safely update ->b_flags from
+this context. It turns out that we already have separate buffer
+state bits and associated serialization for dealing with buffer LRU
+state in the form of ->b_state and ->b_lock. Therefore, replace the
+_XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O
+accounting wrappers appropriately and make sure they are used with
+the correct locking. This ensures that buffer in-flight state can be
+modified at buffer release time without racing with modifications
+from a buffer lock holder.
+
+Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Tested-by: Libor Pechacek <lpechacek@suse.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_buf.c |   38 ++++++++++++++++++++++++++------------
+ fs/xfs/xfs_buf.h |    5 ++---
+ 2 files changed, 28 insertions(+), 15 deletions(-)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -96,12 +96,16 @@ static inline void
+ xfs_buf_ioacct_inc(
+ 	struct xfs_buf	*bp)
+ {
+-	if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
++	if (bp->b_flags & XBF_NO_IOACCT)
+ 		return;
+ 
+ 	ASSERT(bp->b_flags & XBF_ASYNC);
+-	bp->b_flags |= _XBF_IN_FLIGHT;
+-	percpu_counter_inc(&bp->b_target->bt_io_count);
++	spin_lock(&bp->b_lock);
++	if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
++		bp->b_state |= XFS_BSTATE_IN_FLIGHT;
++		percpu_counter_inc(&bp->b_target->bt_io_count);
++	}
++	spin_unlock(&bp->b_lock);
+ }
+ 
+ /*
+@@ -109,14 +113,24 @@ xfs_buf_ioacct_inc(
+  * freed and unaccount from the buftarg.
+  */
+ static inline void
+-xfs_buf_ioacct_dec(
++__xfs_buf_ioacct_dec(
+ 	struct xfs_buf	*bp)
+ {
+-	if (!(bp->b_flags & _XBF_IN_FLIGHT))
+-		return;
++	ASSERT(spin_is_locked(&bp->b_lock));
+ 
+-	bp->b_flags &= ~_XBF_IN_FLIGHT;
+-	percpu_counter_dec(&bp->b_target->bt_io_count);
++	if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
++		bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
++		percpu_counter_dec(&bp->b_target->bt_io_count);
++	}
++}
++
++static inline void
++xfs_buf_ioacct_dec(
++	struct xfs_buf	*bp)
++{
++	spin_lock(&bp->b_lock);
++	__xfs_buf_ioacct_dec(bp);
++	spin_unlock(&bp->b_lock);
+ }
+ 
+ /*
+@@ -148,9 +162,9 @@ xfs_buf_stale(
+ 	 * unaccounted (released to LRU) before that occurs. Drop in-flight
+ 	 * status now to preserve accounting consistency.
+ 	 */
+-	xfs_buf_ioacct_dec(bp);
+-
+ 	spin_lock(&bp->b_lock);
++	__xfs_buf_ioacct_dec(bp);
++
+ 	atomic_set(&bp->b_lru_ref, 0);
+ 	if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
+ 	    (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
+@@ -953,12 +967,12 @@ xfs_buf_rele(
+ 		 * ensures the decrement occurs only once per-buf.
+ 		 */
+ 		if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
+-			xfs_buf_ioacct_dec(bp);
++			__xfs_buf_ioacct_dec(bp);
+ 		goto out_unlock;
+ 	}
+ 
+ 	/* the last reference has been dropped ... */
+-	xfs_buf_ioacct_dec(bp);
++	__xfs_buf_ioacct_dec(bp);
+ 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
+ 		/*
+ 		 * If the buffer is added to the LRU take a new reference to the
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -63,7 +63,6 @@ typedef enum {
+ #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
+ #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
+ #define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
+-#define _XBF_IN_FLIGHT	 (1 << 25) /* I/O in flight, for accounting purposes */
+ 
+ typedef unsigned int xfs_buf_flags_t;
+ 
+@@ -83,14 +82,14 @@ typedef unsigned int xfs_buf_flags_t;
+ 	{ _XBF_PAGES,		"PAGES" }, \
+ 	{ _XBF_KMEM,		"KMEM" }, \
+ 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
+-	{ _XBF_COMPOUND,	"COMPOUND" }, \
+-	{ _XBF_IN_FLIGHT,	"IN_FLIGHT" }
++	{ _XBF_COMPOUND,	"COMPOUND" }
+ 
+ 
+ /*
+  * Internal state flags.
+  */
+ #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */
++#define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
+ 
+ /*
+  * The xfs_buftarg contains 2 notions of "sector size" -
diff --git a/queue-4.9/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch b/queue-4.9/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
new file mode 100644
index 00000000000..ff6af5f6756
--- /dev/null
+++ b/queue-4.9/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
@@ -0,0 +1,102 @@
+From 696a562072e3c14bcd13ae5acc19cdf27679e865 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 28 Mar 2017 14:51:44 -0700
+Subject: xfs: use dedicated log worker wq to avoid deadlock with cil wq
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 696a562072e3c14bcd13ae5acc19cdf27679e865 upstream.
+
+The log covering background task used to be part of the xfssyncd
+workqueue. That workqueue was removed as of commit 5889608df ("xfs:
+syncd workqueue is no more") and the associated work item scheduled
+to the xfs-log wq. The latter is used for log buffer I/O completion.
+
+Since xfs_log_worker() can invoke a log flush, a deadlock is
+possible between the xfs-log and xfs-cil workqueues. Consider the
+following codepath from xfs_log_worker():
+
+xfs_log_worker()
+  xfs_log_force()
+    _xfs_log_force()
+      xlog_cil_force()
+        xlog_cil_force_lsn()
+          xlog_cil_push_now()
+            flush_work()
+
+The above is in xfs-log wq context and blocked waiting on the
+completion of an xfs-cil work item. Concurrently, the cil push in
+progress can end up blocked here:
+
+xlog_cil_push_work()
+  xlog_cil_push()
+    xlog_write()
+      xlog_state_get_iclog_space()
+        xlog_wait(&log->l_flush_wait, ...)
+
+The above is in xfs-cil context waiting on log buffer I/O
+completion, which executes in xfs-log wq context. In this scenario
+both workqueues are deadlocked waiting on eachother.
+
+Add a new workqueue specifically for the high level log covering and
+ail pushing worker, as was the case prior to commit 5889608df.
+
+Diagnosed-by: David Jeffery <djeffery@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_log.c   |    2 +-
+ fs/xfs/xfs_mount.h |    1 +
+ fs/xfs/xfs_super.c |    8 ++++++++
+ 3 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -1293,7 +1293,7 @@ void
+ xfs_log_work_queue(
+ 	struct xfs_mount        *mp)
+ {
+-	queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work,
++	queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
+ 				msecs_to_jiffies(xfs_syncd_centisecs * 10));
+ }
+ 
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -183,6 +183,7 @@ typedef struct xfs_mount {
+ 	struct workqueue_struct	*m_reclaim_workqueue;
+ 	struct workqueue_struct	*m_log_workqueue;
+ 	struct workqueue_struct *m_eofblocks_workqueue;
++	struct workqueue_struct	*m_sync_workqueue;
+ 
+ 	/*
+ 	 * Generation of the filesysyem layout.  This is incremented by each
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -872,8 +872,15 @@ xfs_init_mount_workqueues(
+ 	if (!mp->m_eofblocks_workqueue)
+ 		goto out_destroy_log;
+ 
++	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
++					       mp->m_fsname);
++	if (!mp->m_sync_workqueue)
++		goto out_destroy_eofb;
++
+ 	return 0;
+ 
++out_destroy_eofb:
++	destroy_workqueue(mp->m_eofblocks_workqueue);
+ out_destroy_log:
+ 	destroy_workqueue(mp->m_log_workqueue);
+ out_destroy_reclaim:
+@@ -894,6 +901,7 @@ STATIC void
+ xfs_destroy_mount_workqueues(
+ 	struct xfs_mount	*mp)
+ {
++	destroy_workqueue(mp->m_sync_workqueue);
+ 	destroy_workqueue(mp->m_eofblocks_workqueue);
+ 	destroy_workqueue(mp->m_log_workqueue);
+ 	destroy_workqueue(mp->m_reclaim_workqueue);
diff --git a/queue-4.9/xfs-verify-inline-directory-data-forks.patch b/queue-4.9/xfs-verify-inline-directory-data-forks.patch
new file mode 100644
index 00000000000..dafbe3ad8a3
--- /dev/null
+++ b/queue-4.9/xfs-verify-inline-directory-data-forks.patch
@@ -0,0 +1,290 @@
+From 630a04e79dd41ff746b545d4fc052e0abb836120 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Wed, 15 Mar 2017 00:24:25 -0700
+Subject: xfs: verify inline directory data forks
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 630a04e79dd41ff746b545d4fc052e0abb836120 upstream.
+
+When we're reading or writing the data fork of an inline directory,
+check the contents to make sure we're not overflowing buffers or eating
+garbage data.  xfs/348 corrupts an inline symlink into an inline
+directory, triggering a buffer overflow bug.
+
+v2: add more checks consistent with _dir2_sf_check and make the verifier
+usable from anywhere.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_dir2_priv.h  |    2 
+ fs/xfs/libxfs/xfs_dir2_sf.c    |   87 +++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/libxfs/xfs_inode_fork.c |   26 ++++++++++--
+ fs/xfs/libxfs/xfs_inode_fork.h |    2 
+ fs/xfs/xfs_dir2_readdir.c      |   11 -----
+ fs/xfs/xfs_inode.c             |   12 ++++-
+ 6 files changed, 122 insertions(+), 18 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_priv.h
++++ b/fs/xfs/libxfs/xfs_dir2_priv.h
+@@ -126,6 +126,8 @@ extern int xfs_dir2_sf_create(struct xfs
+ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+ extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+ extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
++extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
++		int size);
+ 
+ /* xfs_dir2_readdir.c */
+ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
+--- a/fs/xfs/libxfs/xfs_dir2_sf.c
++++ b/fs/xfs/libxfs/xfs_dir2_sf.c
+@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
+ }
+ #endif	/* DEBUG */
+ 
++/* Verify the consistency of an inline directory. */
++int
++xfs_dir2_sf_verify(
++	struct xfs_mount		*mp,
++	struct xfs_dir2_sf_hdr		*sfp,
++	int				size)
++{
++	struct xfs_dir2_sf_entry	*sfep;
++	struct xfs_dir2_sf_entry	*next_sfep;
++	char				*endp;
++	const struct xfs_dir_ops	*dops;
++	xfs_ino_t			ino;
++	int				i;
++	int				i8count;
++	int				offset;
++	__uint8_t			filetype;
++
++	dops = xfs_dir_get_ops(mp, NULL);
++
++	/*
++	 * Give up if the directory is way too short.
++	 */
++	XFS_WANT_CORRUPTED_RETURN(mp, size >
++			offsetof(struct xfs_dir2_sf_hdr, parent));
++	XFS_WANT_CORRUPTED_RETURN(mp, size >=
++			xfs_dir2_sf_hdr_size(sfp->i8count));
++
++	endp = (char *)sfp + size;
++
++	/* Check .. entry */
++	ino = dops->sf_get_parent_ino(sfp);
++	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
++	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
++	offset = dops->data_first_offset;
++
++	/* Check all reported entries */
++	sfep = xfs_dir2_sf_firstentry(sfp);
++	for (i = 0; i < sfp->count; i++) {
++		/*
++		 * struct xfs_dir2_sf_entry has a variable length.
++		 * Check the fixed-offset parts of the structure are
++		 * within the data buffer.
++		 */
++		XFS_WANT_CORRUPTED_RETURN(mp,
++				((char *)sfep + sizeof(*sfep)) < endp);
++
++		/* Don't allow names with known bad length. */
++		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
++		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
++
++		/*
++		 * Check that the variable-length part of the structure is
++		 * within the data buffer.  The next entry starts after the
++		 * name component, so nextentry is an acceptable test.
++		 */
++		next_sfep = dops->sf_nextentry(sfp, sfep);
++		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
++
++		/* Check that the offsets always increase. */
++		XFS_WANT_CORRUPTED_RETURN(mp,
++				xfs_dir2_sf_get_offset(sfep) >= offset);
++
++		/* Check the inode number. */
++		ino = dops->sf_get_ino(sfp, sfep);
++		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
++		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
++
++		/* Check the file type. */
++		filetype = dops->sf_get_ftype(sfep);
++		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
++
++		offset = xfs_dir2_sf_get_offset(sfep) +
++				dops->data_entsize(sfep->namelen);
++
++		sfep = next_sfep;
++	}
++	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
++	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
++
++	/* Make sure this whole thing ought to be in local format. */
++	XFS_WANT_CORRUPTED_RETURN(mp, offset +
++	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
++	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
++
++	return 0;
++}
++
+ /*
+  * Create a new (shortform) directory.
+  */
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -33,6 +33,8 @@
+ #include "xfs_trace.h"
+ #include "xfs_attr_sf.h"
+ #include "xfs_da_format.h"
++#include "xfs_da_btree.h"
++#include "xfs_dir2_priv.h"
+ 
+ kmem_zone_t *xfs_ifork_zone;
+ 
+@@ -320,6 +322,7 @@ xfs_iformat_local(
+ 	int		whichfork,
+ 	int		size)
+ {
++	int		error;
+ 
+ 	/*
+ 	 * If the size is unreasonable, then something
+@@ -336,6 +339,14 @@ xfs_iformat_local(
+ 		return -EFSCORRUPTED;
+ 	}
+ 
++	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
++		error = xfs_dir2_sf_verify(ip->i_mount,
++				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
++				size);
++		if (error)
++			return error;
++	}
++
+ 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
+ 	return 0;
+ }
+@@ -856,7 +867,7 @@ xfs_iextents_copy(
+  * In these cases, the format always takes precedence, because the
+  * format indicates the current state of the fork.
+  */
+-void
++int
+ xfs_iflush_fork(
+ 	xfs_inode_t		*ip,
+ 	xfs_dinode_t		*dip,
+@@ -866,6 +877,7 @@ xfs_iflush_fork(
+ 	char			*cp;
+ 	xfs_ifork_t		*ifp;
+ 	xfs_mount_t		*mp;
++	int			error;
+ 	static const short	brootflag[2] =
+ 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+ 	static const short	dataflag[2] =
+@@ -874,7 +886,7 @@ xfs_iflush_fork(
+ 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+ 
+ 	if (!iip)
+-		return;
++		return 0;
+ 	ifp = XFS_IFORK_PTR(ip, whichfork);
+ 	/*
+ 	 * This can happen if we gave up in iformat in an error path,
+@@ -882,12 +894,19 @@ xfs_iflush_fork(
+ 	 */
+ 	if (!ifp) {
+ 		ASSERT(whichfork == XFS_ATTR_FORK);
+-		return;
++		return 0;
+ 	}
+ 	cp = XFS_DFORK_PTR(dip, whichfork);
+ 	mp = ip->i_mount;
+ 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ 	case XFS_DINODE_FMT_LOCAL:
++		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
++			error = xfs_dir2_sf_verify(mp,
++					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
++					ifp->if_bytes);
++			if (error)
++				return error;
++		}
+ 		if ((iip->ili_fields & dataflag[whichfork]) &&
+ 		    (ifp->if_bytes > 0)) {
+ 			ASSERT(ifp->if_u1.if_data != NULL);
+@@ -940,6 +959,7 @@ xfs_iflush_fork(
+ 		ASSERT(0);
+ 		break;
+ 	}
++	return 0;
+ }
+ 
+ /*
+--- a/fs/xfs/libxfs/xfs_inode_fork.h
++++ b/fs/xfs/libxfs/xfs_inode_fork.h
+@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
+ struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
+ 
+ int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
+-void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
++int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+ 				struct xfs_inode_log_item *, int);
+ void		xfs_idestroy_fork(struct xfs_inode *, int);
+ void		xfs_idata_realloc(struct xfs_inode *, int, int);
+--- a/fs/xfs/xfs_dir2_readdir.c
++++ b/fs/xfs/xfs_dir2_readdir.c
+@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
+ 	struct xfs_da_geometry	*geo = args->geo;
+ 
+ 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+-	/*
+-	 * Give up if the directory is way too short.
+-	 */
+-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+-		return -EIO;
+-	}
+-
+ 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+ 	ASSERT(dp->i_df.if_u1.if_data != NULL);
+ 
+ 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ 
+-	if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
+-		return -EFSCORRUPTED;
+-
+ 	/*
+ 	 * If the block number in the offset is out of range, we're done.
+ 	 */
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3491,6 +3491,7 @@ xfs_iflush_int(
+ 	struct xfs_inode_log_item *iip = ip->i_itemp;
+ 	struct xfs_dinode	*dip;
+ 	struct xfs_mount	*mp = ip->i_mount;
++	int			error;
+ 
+ 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+ 	ASSERT(xfs_isiflocked(ip));
+@@ -3573,9 +3574,14 @@ xfs_iflush_int(
+ 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
+ 		ip->i_d.di_flushiter = 0;
+ 
+-	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+-	if (XFS_IFORK_Q(ip))
+-		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
++	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
++	if (error)
++		return error;
++	if (XFS_IFORK_Q(ip)) {
++		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
++		if (error)
++			return error;
++	}
+ 	xfs_inobp_check(mp, bp);
+ 
+ 	/*