net-specify-the-owning-module-for-lwtunnel-ops.patch
lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch
net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch
+xfs-bump-up-reserved-blocks-in-xfs_alloc_set_aside.patch
+xfs-fix-bogus-minleft-manipulations.patch
+xfs-adjust-allocation-length-in-xfs_alloc_space_available.patch
+xfs-don-t-rely-on-total-in-xfs_alloc_space_available.patch
+xfs-don-t-print-warnings-when-xfs_log_force-fails.patch
+xfs-make-the-assert-condition-likely.patch
+xfs-sanity-check-directory-inode-di_size.patch
+xfs-add-missing-include-dependencies-to-xfs_dir2.h.patch
+xfs-replace-xfs_mode_to_ftype-table-with-switch-statement.patch
+xfs-sanity-check-inode-mode-when-creating-new-dentry.patch
+xfs-sanity-check-inode-di_mode.patch
+xfs-don-t-wrap-id-in-xfs_dq_get_next_id.patch
+xfs-fix-xfs_mode_to_ftype-prototype.patch
+xfs-fix-cow-writeback-race.patch
+xfs-verify-dirblocklog-correctly.patch
+xfs-remove-racy-hasattr-check-from-attr-ops.patch
+xfs-extsize-hints-are-not-unlikely-in-xfs_bmap_btalloc.patch
+xfs-clear-_xbf_pages-from-buffers-when-readahead-page.patch
+xfs-fix-bmv_count-confusion-w-shared-extents.patch
--- /dev/null
+From hch@lst.de Thu Feb 2 11:14:21 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:00 +0100
+Subject: xfs: add missing include dependencies to xfs_dir2.h
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-9-git-send-email-hch@lst.de>
+
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit b597dd5373a1ccc08218665dc8417433b1c09550 upstream.
+
+xfs_dir2.h dereferences some data types in inline functions
+and fails to include those type definitions, e.g.:
+xfs_dir2_data_aoff_t, struct xfs_da_geometry.
+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_dir2.h
++++ b/fs/xfs/libxfs/xfs_dir2.h
+@@ -18,6 +18,9 @@
+ #ifndef __XFS_DIR2_H__
+ #define __XFS_DIR2_H__
+
++#include "xfs_da_format.h"
++#include "xfs_da_btree.h"
++
+ struct xfs_defer_ops;
+ struct xfs_da_args;
+ struct xfs_inode;
--- /dev/null
+From hch@lst.de Thu Feb 2 11:12:58 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:55 +0100
+Subject: xfs: adjust allocation length in xfs_alloc_space_available
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-4-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 54fee133ad59c87ab01dd84ab3e9397134b32acb upstream.
+
+We must decide in xfs_alloc_fix_freelist if we can perform an
+allocation from a given AG is possible or not based on the available
+space, and should not fail the allocation past that point on a
+healthy file system.
+
+But currently we have two additional places that second-guess
+xfs_alloc_fix_freelist: xfs_alloc_ag_vextent tries to adjust the
+maxlen parameter to remove the reservation before doing the
+allocation (but ignores the various minium freespace requirements),
+and xfs_alloc_fix_minleft tries to fix up the allocated length
+after we've found an extent, but ignores the reservations and also
+doesn't take the AGFL into account (and thus fails allocations
+for not matching minlen in some cases).
+
+Remove all these later fixups and just correct the maxlen argument
+inside xfs_alloc_fix_freelist once we have the AGF buffer locked.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 81 +++++++++-------------------------------------
+ fs/xfs/libxfs/xfs_alloc.h | 2 -
+ 2 files changed, 18 insertions(+), 65 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -362,36 +362,12 @@ xfs_alloc_fix_len(
+ return;
+ ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
+ ASSERT(rlen % args->prod == args->mod);
++ ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
++ rlen + args->minleft);
+ args->len = rlen;
+ }
+
+ /*
+- * Fix up length if there is too little space left in the a.g.
+- * Return 1 if ok, 0 if too little, should give up.
+- */
+-STATIC int
+-xfs_alloc_fix_minleft(
+- xfs_alloc_arg_t *args) /* allocation argument structure */
+-{
+- xfs_agf_t *agf; /* a.g. freelist header */
+- int diff; /* free space difference */
+-
+- if (args->minleft == 0)
+- return 1;
+- agf = XFS_BUF_TO_AGF(args->agbp);
+- diff = be32_to_cpu(agf->agf_freeblks)
+- - args->len - args->minleft;
+- if (diff >= 0)
+- return 1;
+- args->len += diff; /* shrink the allocated space */
+- /* casts to (int) catch length underflows */
+- if ((int)args->len >= (int)args->minlen)
+- return 1;
+- args->agbno = NULLAGBLOCK;
+- return 0;
+-}
+-
+-/*
+ * Update the two btrees, logically removing from freespace the extent
+ * starting at rbno, rlen blocks. The extent is contained within the
+ * actual (current) free extent fbno for flen blocks.
+@@ -686,8 +662,6 @@ xfs_alloc_ag_vextent(
+ xfs_alloc_arg_t *args) /* argument structure for allocation */
+ {
+ int error=0;
+- xfs_extlen_t reservation;
+- xfs_extlen_t oldmax;
+
+ ASSERT(args->minlen > 0);
+ ASSERT(args->maxlen > 0);
+@@ -696,20 +670,6 @@ xfs_alloc_ag_vextent(
+ ASSERT(args->alignment > 0);
+
+ /*
+- * Clamp maxlen to the amount of free space minus any reservations
+- * that have been made.
+- */
+- oldmax = args->maxlen;
+- reservation = xfs_ag_resv_needed(args->pag, args->resv);
+- if (args->maxlen > args->pag->pagf_freeblks - reservation)
+- args->maxlen = args->pag->pagf_freeblks - reservation;
+- if (args->maxlen == 0) {
+- args->agbno = NULLAGBLOCK;
+- args->maxlen = oldmax;
+- return 0;
+- }
+-
+- /*
+ * Branch to correct routine based on the type.
+ */
+ args->wasfromfl = 0;
+@@ -728,8 +688,6 @@ xfs_alloc_ag_vextent(
+ /* NOTREACHED */
+ }
+
+- args->maxlen = oldmax;
+-
+ if (error || args->agbno == NULLAGBLOCK)
+ return error;
+
+@@ -838,9 +796,6 @@ xfs_alloc_ag_vextent_exact(
+ args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+ - args->agbno;
+ xfs_alloc_fix_len(args);
+- if (!xfs_alloc_fix_minleft(args))
+- goto not_found;
+-
+ ASSERT(args->agbno + args->len <= tend);
+
+ /*
+@@ -1146,12 +1101,7 @@ restart:
+ XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
+ ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ args->len = blen;
+- if (!xfs_alloc_fix_minleft(args)) {
+- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+- trace_xfs_alloc_near_nominleft(args);
+- return 0;
+- }
+- blen = args->len;
++
+ /*
+ * We are allocating starting at bnew for blen blocks.
+ */
+@@ -1343,12 +1293,6 @@ restart:
+ */
+ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+ xfs_alloc_fix_len(args);
+- if (!xfs_alloc_fix_minleft(args)) {
+- trace_xfs_alloc_near_nominleft(args);
+- xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+- return 0;
+- }
+ rlen = args->len;
+ (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
+ args->datatype, ltbnoa, ltlena, <new);
+@@ -1550,8 +1494,6 @@ restart:
+ }
+ xfs_alloc_fix_len(args);
+
+- if (!xfs_alloc_fix_minleft(args))
+- goto out_nominleft;
+ rlen = args->len;
+ XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
+ /*
+@@ -2070,10 +2012,20 @@ xfs_alloc_space_available(
+
+ /* do we have enough free space remaining for the allocation? */
+ available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
+- reservation - min_free - args->total);
+- if (available < (int)args->minleft || available <= 0)
++ reservation - min_free - args->minleft);
++ if (available < (int)args->total)
+ return false;
+
++ /*
++ * Clamp maxlen to the amount of free space available for the actual
++ * extent allocation.
++ */
++ if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
++ args->maxlen = available;
++ ASSERT(args->maxlen > 0);
++ ASSERT(args->maxlen >= args->minlen);
++ }
++
+ return true;
+ }
+
+@@ -2119,7 +2071,8 @@ xfs_alloc_fix_freelist(
+ }
+
+ need = xfs_alloc_min_freelist(mp, pag);
+- if (!xfs_alloc_space_available(args, need, flags))
++ if (!xfs_alloc_space_available(args, need, flags |
++ XFS_ALLOC_FLAG_CHECK))
+ goto out_agbp_relse;
+
+ /*
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t;
+ #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
+ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
+ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
+-
++#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */
+
+ /*
+ * Argument structure for xfs_alloc routines.
--- /dev/null
+From hch@lst.de Thu Feb 2 11:12:10 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:53 +0100
+Subject: xfs: bump up reserved blocks in xfs_alloc_set_aside
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-2-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 5149fd327f16e393c1d04fa5325ab072c32472bf upstream.
+
+Setting aside 4 blocks globally for bmbt splits isn't all that useful,
+as different threads can allocate space in parallel. Bump it to 4
+blocks per AG to allow each thread that is currently doing an
+allocation to dip into it separately. Without that we may no have
+enough reserved blocks if there are enough parallel transactions
+in an almost out space file system that all run into bmap btree
+splits.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -95,10 +95,7 @@ unsigned int
+ xfs_alloc_set_aside(
+ struct xfs_mount *mp)
+ {
+- unsigned int blocks;
+-
+- blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
+- return blocks;
++ return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
+ }
+
+ /*
--- /dev/null
+From hch@lst.de Thu Feb 2 11:16:46 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:10 +0100
+Subject: xfs: clear _XBF_PAGES from buffers when readahead page
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-19-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 2aa6ba7b5ad3189cc27f14540aa2f57f0ed8df4b upstream.
+
+If we try to allocate memory pages to back an xfs_buf that we're trying
+to read, it's possible that we'll be so short on memory that the page
+allocation fails. For a blocking read we'll just wait, but for
+readahead we simply dump all the pages we've collected so far.
+
+Unfortunately, after dumping the pages we neglect to clear the
+_XBF_PAGES state, which means that the subsequent call to xfs_buf_free
+thinks that b_pages still points to pages we own. It then double-frees
+the b_pages pages.
+
+This results in screaming about negative page refcounts from the memory
+manager, which xfs oughtn't be triggering. To reproduce this case,
+mount a filesystem where the size of the inodes far outweighs the
+availalble memory (a ~500M inode filesystem on a VM with 300MB memory
+did the trick here) and run bulkstat in parallel with other memory
+eating processes to put a huge load on the system. The "check summary"
+phase of xfs_scrub also works for this purpose.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -423,6 +423,7 @@ retry:
+ out_free_pages:
+ for (i = 0; i < bp->b_page_count; i++)
+ __free_page(bp->b_pages[i]);
++ bp->b_flags &= ~_XBF_PAGES;
+ return error;
+ }
+
--- /dev/null
+From hch@lst.de Thu Feb 2 11:13:25 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:57 +0100
+Subject: xfs: don't print warnings when xfs_log_force fails
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-6-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 84a4620cfe97c9d57e39b2369bfb77faff55063d upstream.
+
+There are only two reasons for xfs_log_force / xfs_log_force_lsn to fail:
+one is an I/O error, for which xlog_bdstrat already logs a warning, and
+the second is an already shutdown log due to a previous I/O errors. In
+the latter case we'll already have a previous indication for the actual
+error, but the large stream of misleading warnings from xfs_log_force
+will probably scroll it out of the message buffer.
+
+Simply removing the warnings thus makes the XFS log reporting significantly
+better.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -3324,12 +3324,8 @@ xfs_log_force(
+ xfs_mount_t *mp,
+ uint flags)
+ {
+- int error;
+-
+ trace_xfs_log_force(mp, 0, _RET_IP_);
+- error = _xfs_log_force(mp, flags, NULL);
+- if (error)
+- xfs_warn(mp, "%s: error %d returned.", __func__, error);
++ _xfs_log_force(mp, flags, NULL);
+ }
+
+ /*
+@@ -3473,12 +3469,8 @@ xfs_log_force_lsn(
+ xfs_lsn_t lsn,
+ uint flags)
+ {
+- int error;
+-
+ trace_xfs_log_force(mp, lsn, _RET_IP_);
+- error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
+- if (error)
+- xfs_warn(mp, "%s: error %d returned.", __func__, error);
++ _xfs_log_force_lsn(mp, lsn, flags, NULL);
+ }
+
+ /*
--- /dev/null
+From hch@lst.de Thu Feb 2 11:13:10 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:56 +0100
+Subject: xfs: don't rely on ->total in xfs_alloc_space_available
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-5-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 12ef830198b0d71668eb9b59f9ba69d32951a48a upstream.
+
+->total is a bit of an odd parameter passed down to the low-level
+allocator all the way from the high-level callers. It's supposed to
+contain the maximum number of blocks to be allocated for the whole
+transaction [1].
+
+But in xfs_iomap_write_allocate we only convert existing delayed
+allocations and thus only have a minimal block reservation for the
+current transaction, so xfs_alloc_space_available can't use it for
+the allocation decisions. Use the maximum of args->total and the
+calculated block requirement to make a decision. We probably should
+get rid of args->total eventually and instead apply ->minleft more
+broadly, but that will require some extensive changes all over.
+
+[1] which creates lots of confusion as most callers don't decrement it
+once doing a first allocation. But that's for a separate series.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -1995,7 +1995,7 @@ xfs_alloc_space_available(
+ int flags)
+ {
+ struct xfs_perag *pag = args->pag;
+- xfs_extlen_t longest;
++ xfs_extlen_t alloc_len, longest;
+ xfs_extlen_t reservation; /* blocks that are still reserved */
+ int available;
+
+@@ -2005,15 +2005,16 @@ xfs_alloc_space_available(
+ reservation = xfs_ag_resv_needed(pag, args->resv);
+
+ /* do we have enough contiguous free space for the allocation? */
++ alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
+ longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
+ reservation);
+- if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
++ if (longest < alloc_len)
+ return false;
+
+ /* do we have enough free space remaining for the allocation? */
+ available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
+ reservation - min_free - args->minleft);
+- if (available < (int)args->total)
++ if (available < (int)max(args->total, alloc_len))
+ return false;
+
+ /*
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:05 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:04 +0100
+Subject: xfs: don't wrap ID in xfs_dq_get_next_id
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-13-git-send-email-hch@lst.de>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 657bdfb7f5e68ca5e2ed009ab473c429b0d6af85 upstream.
+
+The GETNEXTQOTA ioctl takes whatever ID is sent in,
+and looks for the next active quota for an user
+equal or higher to that ID.
+
+But if we are at the maximum ID and then ask for the "next"
+one, we may wrap back to zero. In this case, userspace
+may loop forever, because it will start querying again
+at zero.
+
+We'll fix this in userspace as well, but for the kernel,
+return -ENOENT if we ask for the next quota ID
+past UINT_MAX so the caller knows to stop.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -710,6 +710,10 @@ xfs_dq_get_next_id(
+ /* Simple advance */
+ next_id = *id + 1;
+
++ /* If we'd wrap past the max ID, stop */
++ if (next_id < *id)
++ return -ENOENT;
++
+ /* If new ID is within the current chunk, advancing it sufficed */
+ if (next_id % mp->m_quotainfo->qi_dqperchunk) {
+ *id = next_id;
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:49 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:09 +0100
+Subject: xfs: extsize hints are not unlikely in xfs_bmap_btalloc
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-18-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 493611ebd62673f39e2f52c2561182c558a21cb6 upstream.
+
+With COW files they are the hotpath, just like for files with the
+extent size hint attribute. We really shouldn't micro-manage anything
+but failure cases with unlikely.
+
+Additionally Arnd Bergmann recently reported that one of these two
+unlikely annotations causes link failures together with an upcoming
+kernel instrumentation patch, so let's get rid of it ASAP.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reported-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3720,7 +3720,7 @@ xfs_bmap_btalloc(
+ align = xfs_get_cowextsz_hint(ap->ip);
+ else if (xfs_alloc_is_userdata(ap->datatype))
+ align = xfs_get_extsz_hint(ap->ip);
+- if (unlikely(align)) {
++ if (align) {
+ error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
+ align, 0, ap->eof, 0, ap->conv,
+ &ap->offset, &ap->length);
+@@ -3792,7 +3792,7 @@ xfs_bmap_btalloc(
+ args.minlen = ap->minlen;
+ }
+ /* apply extent size hints if obtained earlier */
+- if (unlikely(align)) {
++ if (align) {
+ args.prod = align;
+ if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
+ args.mod = (xfs_extlen_t)(args.prod - args.mod);
--- /dev/null
+From hch@lst.de Thu Feb 2 11:17:02 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:11 +0100
+Subject: xfs: fix bmv_count confusion w/ shared extents
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-20-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit c364b6d0b6cda1cd5d9ab689489adda3e82529aa upstream.
+
+In a bmapx call, bmv_count is the total size of the array, including the
+zeroth element that userspace uses to supply the search key. The output
+array starts at offset 1 so that we can set up the user for the next
+invocation. Since we now can split an extent into multiple bmap records
+due to shared/unshared status, we have to be careful that we don't
+overflow the output array.
+
+In the original patch f86f403794b ("xfs: teach get_bmapx about shared
+extents and the CoW fork") I used cur_ext (the output index) to check
+for overflows, albeit with an off-by-one error. Since nexleft no longer
+describes the number of unfilled slots in the output, we can rip all
+that out and use cur_ext for the overflow check directly.
+
+Failure to do this causes heap corruption in bmapx callers such as
+xfs_io and xfs_scrub. xfs/328 can reproduce this problem.
+
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_bmap_util.c | 28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -528,7 +528,6 @@ xfs_getbmap(
+ xfs_bmbt_irec_t *map; /* buffer for user's data */
+ xfs_mount_t *mp; /* file system mount point */
+ int nex; /* # of user extents can do */
+- int nexleft; /* # of user extents left */
+ int subnex; /* # of bmapi's can do */
+ int nmap; /* number of map entries */
+ struct getbmapx *out; /* output structure */
+@@ -686,10 +685,8 @@ xfs_getbmap(
+ goto out_free_map;
+ }
+
+- nexleft = nex;
+-
+ do {
+- nmap = (nexleft > subnex) ? subnex : nexleft;
++ nmap = (nex> subnex) ? subnex : nex;
+ error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
+ XFS_BB_TO_FSB(mp, bmv->bmv_length),
+ map, &nmap, bmapi_flags);
+@@ -697,8 +694,8 @@ xfs_getbmap(
+ goto out_free_map;
+ ASSERT(nmap <= subnex);
+
+- for (i = 0; i < nmap && nexleft && bmv->bmv_length &&
+- cur_ext < bmv->bmv_count; i++) {
++ for (i = 0; i < nmap && bmv->bmv_length &&
++ cur_ext < bmv->bmv_count - 1; i++) {
+ out[cur_ext].bmv_oflags = 0;
+ if (map[i].br_state == XFS_EXT_UNWRITTEN)
+ out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
+@@ -760,16 +757,27 @@ xfs_getbmap(
+ continue;
+ }
+
++ /*
++ * In order to report shared extents accurately,
++ * we report each distinct shared/unshared part
++ * of a single bmbt record using multiple bmap
++ * extents. To make that happen, we iterate the
++ * same map array item multiple times, each
++ * time trimming out the subextent that we just
++ * reported.
++ *
++ * Because of this, we must check the out array
++ * index (cur_ext) directly against bmv_count-1
++ * to avoid overflows.
++ */
+ if (inject_map.br_startblock != NULLFSBLOCK) {
+ map[i] = inject_map;
+ i--;
+- } else
+- nexleft--;
++ }
+ bmv->bmv_entries++;
+ cur_ext++;
+ }
+- } while (nmap && nexleft && bmv->bmv_length &&
+- cur_ext < bmv->bmv_count);
++ } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
+
+ out_free_map:
+ kmem_free(map);
--- /dev/null
+From hch@lst.de Thu Feb 2 11:12:20 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:54 +0100
+Subject: xfs: fix bogus minleft manipulations
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-3-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 255c516278175a6dc7037d1406307f35237d8688 upstream.
+
+We can't just set minleft to 0 when we're low on space - that's exactly
+what we need minleft for: to protect space in the AG for btree block
+allocations when we are low on free space.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 24 +++++++-----------------
+ fs/xfs/libxfs/xfs_bmap.c | 3 ---
+ fs/xfs/libxfs/xfs_bmap_btree.c | 3 +--
+ 3 files changed, 8 insertions(+), 22 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2635,12 +2635,10 @@ xfs_alloc_vextent(
+ xfs_agblock_t agsize; /* allocation group size */
+ int error;
+ int flags; /* XFS_ALLOC_FLAG_... locking flags */
+- xfs_extlen_t minleft;/* minimum left value, temp copy */
+ xfs_mount_t *mp; /* mount structure pointer */
+ xfs_agnumber_t sagno; /* starting allocation group number */
+ xfs_alloctype_t type; /* input allocation type */
+ int bump_rotor = 0;
+- int no_min = 0;
+ xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
+
+ mp = args->mp;
+@@ -2669,7 +2667,6 @@ xfs_alloc_vextent(
+ trace_xfs_alloc_vextent_badargs(args);
+ return 0;
+ }
+- minleft = args->minleft;
+
+ switch (type) {
+ case XFS_ALLOCTYPE_THIS_AG:
+@@ -2680,9 +2677,7 @@ xfs_alloc_vextent(
+ */
+ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+ args->pag = xfs_perag_get(mp, args->agno);
+- args->minleft = 0;
+ error = xfs_alloc_fix_freelist(args, 0);
+- args->minleft = minleft;
+ if (error) {
+ trace_xfs_alloc_vextent_nofix(args);
+ goto error0;
+@@ -2747,9 +2742,7 @@ xfs_alloc_vextent(
+ */
+ for (;;) {
+ args->pag = xfs_perag_get(mp, args->agno);
+- if (no_min) args->minleft = 0;
+ error = xfs_alloc_fix_freelist(args, flags);
+- args->minleft = minleft;
+ if (error) {
+ trace_xfs_alloc_vextent_nofix(args);
+ goto error0;
+@@ -2789,20 +2782,17 @@ xfs_alloc_vextent(
+ * or switch to non-trylock mode.
+ */
+ if (args->agno == sagno) {
+- if (no_min == 1) {
++ if (flags == 0) {
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_vextent_allfailed(args);
+ break;
+ }
+- if (flags == 0) {
+- no_min = 1;
+- } else {
+- flags = 0;
+- if (type == XFS_ALLOCTYPE_START_BNO) {
+- args->agbno = XFS_FSB_TO_AGBNO(mp,
+- args->fsbno);
+- args->type = XFS_ALLOCTYPE_NEAR_BNO;
+- }
++
++ flags = 0;
++ if (type == XFS_ALLOCTYPE_START_BNO) {
++ args->agbno = XFS_FSB_TO_AGBNO(mp,
++ args->fsbno);
++ args->type = XFS_ALLOCTYPE_NEAR_BNO;
+ }
+ }
+ xfs_perag_put(args->pag);
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3903,7 +3903,6 @@ xfs_bmap_btalloc(
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.total = ap->minlen;
+- args.minleft = 0;
+ if ((error = xfs_alloc_vextent(&args)))
+ return error;
+ ap->dfops->dop_low = true;
+@@ -4437,8 +4436,6 @@ xfs_bmapi_allocate(
+ if (error)
+ return error;
+
+- if (bma->dfops->dop_low)
+- bma->minleft = 0;
+ if (bma->cur)
+ bma->cur->bc_private.b.firstblock = *bma->firstblock;
+ if (bma->blkno == NULLFSBLOCK)
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -502,12 +502,11 @@ try_another_ag:
+ if (args.fsbno == NULLFSBLOCK && args.minleft) {
+ /*
+ * Could not find an AG with enough free space to satisfy
+- * a full btree split. Try again without minleft and if
++ * a full btree split. Try again and if
+ * successful activate the lowspace algorithm.
+ */
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+- args.minleft = 0;
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto error0;
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:22 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:06 +0100
+Subject: xfs: fix COW writeback race
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-15-git-send-email-hch@lst.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit d2b3964a0780d2d2994eba57f950d6c9fe489ed8 upstream.
+
+Due to the way how xfs_iomap_write_allocate tries to convert the whole
+found extents from delalloc to real space we can run into a race
+condition with multiple threads doing writes to this same extent.
+For the non-COW case that is harmless as the only thing that can happen
+is that we call xfs_bmapi_write on an extent that has already been
+converted to a real allocation. For COW writes where we move the extent
+from the COW to the data fork after I/O completion the race is, however,
+not quite as harmless. In the worst case we are now calling
+xfs_bmapi_write on a region that contains hole in the COW work, which
+will trip up an assert in debug builds or lead to file system corruption
+in non-debug builds. This seems to be reproducible with workloads of
+small O_DSYNC write, although so far I've not managed to come up with
+a with an isolated reproducer.
+
+The fix for the issue is relatively simple: tell xfs_bmapi_write
+that we are only asked to convert delayed allocations and skip holes
+in that case.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 44 ++++++++++++++++++++++++++++++++------------
+ fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
+ fs/xfs/xfs_iomap.c | 2 +-
+ 3 files changed, 38 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4607,8 +4607,6 @@ xfs_bmapi_write(
+ int n; /* current extent index */
+ xfs_fileoff_t obno; /* old block number (offset) */
+ int whichfork; /* data or attr fork */
+- char inhole; /* current location is hole in file */
+- char wasdelay; /* old extent was delayed */
+
+ #ifdef DEBUG
+ xfs_fileoff_t orig_bno; /* original block number value */
+@@ -4694,22 +4692,44 @@ xfs_bmapi_write(
+ bma.firstblock = firstblock;
+
+ while (bno < end && n < *nmap) {
+- inhole = eof || bma.got.br_startoff > bno;
+- wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
++ bool need_alloc = false, wasdelay = false;
+
+- /*
+- * Make sure we only reflink into a hole.
+- */
+- if (flags & XFS_BMAPI_REMAP)
+- ASSERT(inhole);
+- if (flags & XFS_BMAPI_COWFORK)
+- ASSERT(!inhole);
++ /* in hole or beyoned EOF? */
++ if (eof || bma.got.br_startoff > bno) {
++ if (flags & XFS_BMAPI_DELALLOC) {
++ /*
++ * For the COW fork we can reasonably get a
++ * request for converting an extent that races
++ * with other threads already having converted
++ * part of it, as there converting COW to
++ * regular blocks is not protected using the
++ * IOLOCK.
++ */
++ ASSERT(flags & XFS_BMAPI_COWFORK);
++ if (!(flags & XFS_BMAPI_COWFORK)) {
++ error = -EIO;
++ goto error0;
++ }
++
++ if (eof || bno >= end)
++ break;
++ } else {
++ need_alloc = true;
++ }
++ } else {
++ /*
++ * Make sure we only reflink into a hole.
++ */
++ ASSERT(!(flags & XFS_BMAPI_REMAP));
++ if (isnullstartblock(bma.got.br_startblock))
++ wasdelay = true;
++ }
+
+ /*
+ * First, deal with the hole before the allocated space
+ * that we found, if any.
+ */
+- if (inhole || wasdelay) {
++ if (need_alloc || wasdelay) {
+ bma.eof = eof;
+ bma.conv = !!(flags & XFS_BMAPI_CONVERT);
+ bma.wasdel = wasdelay;
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -110,6 +110,9 @@ struct xfs_extent_free_item
+ /* Map something in the CoW fork. */
+ #define XFS_BMAPI_COWFORK 0x200
+
++/* Only convert delalloc space, don't allocate entirely new extents */
++#define XFS_BMAPI_DELALLOC 0x400
++
+ #define XFS_BMAPI_FLAGS \
+ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
+ { XFS_BMAPI_METADATA, "METADATA" }, \
+@@ -120,7 +123,8 @@ struct xfs_extent_free_item
+ { XFS_BMAPI_CONVERT, "CONVERT" }, \
+ { XFS_BMAPI_ZERO, "ZERO" }, \
+ { XFS_BMAPI_REMAP, "REMAP" }, \
+- { XFS_BMAPI_COWFORK, "COWFORK" }
++ { XFS_BMAPI_COWFORK, "COWFORK" }, \
++ { XFS_BMAPI_DELALLOC, "DELALLOC" }
+
+
+ static inline int xfs_bmapi_aflag(int w)
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
+ xfs_trans_t *tp;
+ int nimaps;
+ int error = 0;
+- int flags = 0;
++ int flags = XFS_BMAPI_DELALLOC;
+ int nres;
+
+ if (whichfork == XFS_COW_FORK)
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:13 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:05 +0100
+Subject: xfs: fix xfs_mode_to_ftype() prototype
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Arnd Bergmann <arnd@arndb.de>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-14-git-send-email-hch@lst.de>
+
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit fd29f7af75b7adf250beccffa63746c6a88e2b74 upstream.
+
+A harmless warning just got introduced:
+
+fs/xfs/libxfs/xfs_dir2.h:40:8: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers]
+
+Removing the 'const' modifier avoids the warning and has no
+other effect.
+
+Fixes: 1fc4d33fed12 ("xfs: replace xfs_mode_to_ftype table with switch statement")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2.c | 2 +-
+ fs/xfs/libxfs/xfs_dir2.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2.c
++++ b/fs/xfs/libxfs/xfs_dir2.c
+@@ -38,7 +38,7 @@ struct xfs_name xfs_name_dotdot = { (uns
+ /*
+ * Convert inode mode to directory entry filetype
+ */
+-const unsigned char xfs_mode_to_ftype(int mode)
++unsigned char xfs_mode_to_ftype(int mode)
+ {
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+--- a/fs/xfs/libxfs/xfs_dir2.h
++++ b/fs/xfs/libxfs/xfs_dir2.h
+@@ -37,7 +37,7 @@ extern struct xfs_name xfs_name_dotdot;
+ /*
+ * Convert inode mode to directory entry filetype
+ */
+-extern const unsigned char xfs_mode_to_ftype(int mode);
++extern unsigned char xfs_mode_to_ftype(int mode);
+
+ /*
+ * directory operations vector for encode/decode routines
--- /dev/null
+From hch@lst.de Thu Feb 2 11:13:35 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:58 +0100
+Subject: xfs: make the ASSERT() condition likely
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-7-git-send-email-hch@lst.de>
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit bf46ecc3d8cca05f2907cf482755c42c2b11a79d upstream.
+
+The ASSERT() condition is the normal case, not the exception,
+so testing the condition should be likely(), not unlikely().
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_linux.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_linux.h
++++ b/fs/xfs/xfs_linux.h
+@@ -330,11 +330,11 @@ static inline __uint64_t howmany_64(__ui
+ }
+
+ #define ASSERT_ALWAYS(expr) \
+- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
++ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+ #ifdef DEBUG
+ #define ASSERT(expr) \
+- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
++ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+ #ifndef STATIC
+ # define STATIC noinline
+@@ -345,7 +345,7 @@ static inline __uint64_t howmany_64(__ui
+ #ifdef XFS_WARN
+
+ #define ASSERT(expr) \
+- (unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
++ (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
+
+ #ifndef STATIC
+ # define STATIC static noinline
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:39 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:08 +0100
+Subject: xfs: remove racy hasattr check from attr ops
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Brian Foster <bfoster@redhat.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-17-git-send-email-hch@lst.de>
+
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 5a93790d4e2df73e30c965ec6e49be82fc3ccfce upstream.
+
+xfs_attr_[get|remove]() have unlocked attribute fork checks to optimize
+away a lock cycle in cases where the fork does not exist or is otherwise
+empty. This check is not safe, however, because an attribute fork short
+form to extent format conversion includes a transient state that causes
+the xfs_inode_hasattr() check to fail. Specifically,
+xfs_attr_shortform_to_leaf() creates an empty extent format attribute
+fork and then adds the existing shortform attributes to it.
+
+This means that lookup of an existing xattr can spuriously return
+-ENOATTR when racing against a setxattr that causes the associated
+format conversion. This was originally reproduced by an untar on a
+particularly configured glusterfs volume, but can also be reproduced on
+demand with properly crafted xattr requests.
+
+The format conversion occurs under the exclusive ilock. xfs_attr_get()
+and xfs_attr_remove() already have the proper locking and checks further
+down in the functions to handle this situation correctly. Drop the
+unlocked checks to avoid the spurious failure and rely on the existing
+logic.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -131,9 +131,6 @@ xfs_attr_get(
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+- if (!xfs_inode_hasattr(ip))
+- return -ENOATTR;
+-
+ error = xfs_attr_args_init(&args, ip, name, flags);
+ if (error)
+ return error;
+@@ -392,9 +389,6 @@ xfs_attr_remove(
+ if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ return -EIO;
+
+- if (!xfs_inode_hasattr(dp))
+- return -ENOATTR;
+-
+ error = xfs_attr_args_init(&args, dp, name, flags);
+ if (error)
+ return error;
--- /dev/null
+From hch@lst.de Thu Feb 2 11:14:30 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:01 +0100
+Subject: xfs: replace xfs_mode_to_ftype table with switch statement
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-10-git-send-email-hch@lst.de>
+
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 1fc4d33fed124fb182e8e6c214e973a29389ae83.
+
+The size of the xfs_mode_to_ftype[] conversion table
+was too small to handle an invalid value of mode=S_IFMT.
+
+Instead of fixing the table size, replace the conversion table
+with a conversion helper that uses a switch statement.
+
+Suggested-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2.c | 36 ++++++++++++++++++++++--------------
+ fs/xfs/libxfs/xfs_dir2.h | 5 ++---
+ fs/xfs/xfs_iops.c | 2 +-
+ 3 files changed, 25 insertions(+), 18 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2.c
++++ b/fs/xfs/libxfs/xfs_dir2.c
+@@ -36,21 +36,29 @@
+ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
+
+ /*
+- * @mode, if set, indicates that the type field needs to be set up.
+- * This uses the transformation from file mode to DT_* as defined in linux/fs.h
+- * for file type specification. This will be propagated into the directory
+- * structure if appropriate for the given operation and filesystem config.
++ * Convert inode mode to directory entry filetype
+ */
+-const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {
+- [0] = XFS_DIR3_FT_UNKNOWN,
+- [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE,
+- [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR,
+- [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV,
+- [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV,
+- [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO,
+- [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK,
+- [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK,
+-};
++const unsigned char xfs_mode_to_ftype(int mode)
++{
++ switch (mode & S_IFMT) {
++ case S_IFREG:
++ return XFS_DIR3_FT_REG_FILE;
++ case S_IFDIR:
++ return XFS_DIR3_FT_DIR;
++ case S_IFCHR:
++ return XFS_DIR3_FT_CHRDEV;
++ case S_IFBLK:
++ return XFS_DIR3_FT_BLKDEV;
++ case S_IFIFO:
++ return XFS_DIR3_FT_FIFO;
++ case S_IFSOCK:
++ return XFS_DIR3_FT_SOCK;
++ case S_IFLNK:
++ return XFS_DIR3_FT_SYMLINK;
++ default:
++ return XFS_DIR3_FT_UNKNOWN;
++ }
++}
+
+ /*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+--- a/fs/xfs/libxfs/xfs_dir2.h
++++ b/fs/xfs/libxfs/xfs_dir2.h
+@@ -35,10 +35,9 @@ struct xfs_dir2_data_unused;
+ extern struct xfs_name xfs_name_dotdot;
+
+ /*
+- * directory filetype conversion tables.
++ * Convert inode mode to directory entry filetype
+ */
+-#define S_SHIFT 12
+-extern const unsigned char xfs_mode_to_ftype[];
++extern const unsigned char xfs_mode_to_ftype(int mode);
+
+ /*
+ * directory operations vector for encode/decode routines
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -103,7 +103,7 @@ xfs_dentry_to_name(
+ {
+ namep->name = dentry->d_name.name;
+ namep->len = dentry->d_name.len;
+- namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT];
++ namep->type = xfs_mode_to_ftype(mode);
+ }
+
+ STATIC void
--- /dev/null
+From hch@lst.de Thu Feb 2 11:14:03 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:55:59 +0100
+Subject: xfs: sanity check directory inode di_size
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-8-git-send-email-hch@lst.de>
+
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 3c6f46eacd876bd723a9bad3c6882714c052fd8e upstream.
+
+This changes fixes an assertion hit when fuzzing on-disk
+i_mode values.
+
+The easy case to fix is when changing an empty file
+i_mode to S_IFDIR. In this case, xfs_dinode_verify()
+detects an illegal zero size for directory and fails
+to load the inode structure from disk.
+
+For the case of non empty file whose i_mode is changed
+to S_IFDIR, the ASSERT() statement in xfs_dir2_isblock()
+is replaced with return -EFSCORRUPTED, to avoid interacting
+with corrupted jusk also when XFS_DEBUG is disabled.
+
+Suggested-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2.c | 3 ++-
+ fs/xfs/libxfs/xfs_inode_buf.c | 7 +++++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2.c
++++ b/fs/xfs/libxfs/xfs_dir2.c
+@@ -631,7 +631,8 @@ xfs_dir2_isblock(
+ if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
+ return rval;
+ rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
+- ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
++ if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize)
++ return -EFSCORRUPTED;
+ *vp = rval;
+ return 0;
+ }
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -386,6 +386,7 @@ xfs_dinode_verify(
+ struct xfs_inode *ip,
+ struct xfs_dinode *dip)
+ {
++ uint16_t mode;
+ uint16_t flags;
+ uint64_t flags2;
+
+@@ -396,8 +397,10 @@ xfs_dinode_verify(
+ if (be64_to_cpu(dip->di_size) & (1ULL << 63))
+ return false;
+
+- /* No zero-length symlinks. */
+- if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
++ mode = be16_to_cpu(dip->di_mode);
++
++ /* No zero-length symlinks/dirs. */
++ if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
+ return false;
+
+ /* only version 3 or greater inodes are extensively verified here */
--- /dev/null
+From hch@lst.de Thu Feb 2 11:14:54 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:03 +0100
+Subject: xfs: sanity check inode di_mode
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-12-git-send-email-hch@lst.de>
+
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit a324cbf10a3c67aaa10c9f47f7b5801562925bc2 upstream.
+
+Check for invalid file type in xfs_dinode_verify()
+and fail to load the inode structure from disk.
+
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -29,6 +29,7 @@
+ #include "xfs_icache.h"
+ #include "xfs_trans.h"
+ #include "xfs_ialloc.h"
++#include "xfs_dir2.h"
+
+ /*
+ * Check that none of the inode's in the buffer have a next
+@@ -398,6 +399,8 @@ xfs_dinode_verify(
+ return false;
+
+ mode = be16_to_cpu(dip->di_mode);
++ if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
++ return false;
+
+ /* No zero-length symlinks/dirs. */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
--- /dev/null
+From hch@lst.de Thu Feb 2 11:14:40 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:02 +0100
+Subject: xfs: sanity check inode mode when creating new dentry
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-11-git-send-email-hch@lst.de>
+
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit fab8eef86c814c3dd46bc5d760b6e4a53d5fc5a6 upstream.
+
+The helper xfs_dentry_to_name() is used by 2 different
+classes of callers: Callers that pass zero mode and don't care
+about the returned name.type field and Callers that pass
+non zero mode and do care about the name.type field.
+
+Change xfs_dentry_to_name() to not take the mode argument and
+change the call sites of the first class to not pass the mode
+argument.
+
+Create a new helper xfs_dentry_mode_to_name() which does pass
+the mode argument and returns -EFSCORRUPTED if mode is invalid.
+Callers that translate non zero mode to on-disk file type now
+check the return value and will export the error to user instead
+of staging an invalid file type to be written to directory entry.
+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iops.c | 48 +++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 39 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -98,12 +98,27 @@ xfs_init_security(
+ static void
+ xfs_dentry_to_name(
+ struct xfs_name *namep,
++ struct dentry *dentry)
++{
++ namep->name = dentry->d_name.name;
++ namep->len = dentry->d_name.len;
++ namep->type = XFS_DIR3_FT_UNKNOWN;
++}
++
++static int
++xfs_dentry_mode_to_name(
++ struct xfs_name *namep,
+ struct dentry *dentry,
+ int mode)
+ {
+ namep->name = dentry->d_name.name;
+ namep->len = dentry->d_name.len;
+ namep->type = xfs_mode_to_ftype(mode);
++
++ if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
++ return -EFSCORRUPTED;
++
++ return 0;
+ }
+
+ STATIC void
+@@ -119,7 +134,7 @@ xfs_cleanup_inode(
+ * xfs_init_security we must back out.
+ * ENOSPC can hit here, among other things.
+ */
+- xfs_dentry_to_name(&teardown, dentry, 0);
++ xfs_dentry_to_name(&teardown, dentry);
+
+ xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+ }
+@@ -154,8 +169,12 @@ xfs_generic_create(
+ if (error)
+ return error;
+
++ /* Verify mode is valid also for tmpfile case */
++ error = xfs_dentry_mode_to_name(&name, dentry, mode);
++ if (unlikely(error))
++ goto out_free_acl;
++
+ if (!tmpfile) {
+- xfs_dentry_to_name(&name, dentry, mode);
+ error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+ } else {
+ error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+@@ -248,7 +267,7 @@ xfs_vn_lookup(
+ if (dentry->d_name.len >= MAXNAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+- xfs_dentry_to_name(&name, dentry, 0);
++ xfs_dentry_to_name(&name, dentry);
+ error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
+ if (unlikely(error)) {
+ if (unlikely(error != -ENOENT))
+@@ -275,7 +294,7 @@ xfs_vn_ci_lookup(
+ if (dentry->d_name.len >= MAXNAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+- xfs_dentry_to_name(&xname, dentry, 0);
++ xfs_dentry_to_name(&xname, dentry);
+ error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+ if (unlikely(error)) {
+ if (unlikely(error != -ENOENT))
+@@ -310,7 +329,9 @@ xfs_vn_link(
+ struct xfs_name name;
+ int error;
+
+- xfs_dentry_to_name(&name, dentry, inode->i_mode);
++ error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
++ if (unlikely(error))
++ return error;
+
+ error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
+ if (unlikely(error))
+@@ -329,7 +350,7 @@ xfs_vn_unlink(
+ struct xfs_name name;
+ int error;
+
+- xfs_dentry_to_name(&name, dentry, 0);
++ xfs_dentry_to_name(&name, dentry);
+
+ error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
+ if (error)
+@@ -359,7 +380,9 @@ xfs_vn_symlink(
+
+ mode = S_IFLNK |
+ (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
+- xfs_dentry_to_name(&name, dentry, mode);
++ error = xfs_dentry_mode_to_name(&name, dentry, mode);
++ if (unlikely(error))
++ goto out;
+
+ error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+ if (unlikely(error))
+@@ -395,6 +418,7 @@ xfs_vn_rename(
+ {
+ struct inode *new_inode = d_inode(ndentry);
+ int omode = 0;
++ int error;
+ struct xfs_name oname;
+ struct xfs_name nname;
+
+@@ -405,8 +429,14 @@ xfs_vn_rename(
+ if (flags & RENAME_EXCHANGE)
+ omode = d_inode(ndentry)->i_mode;
+
+- xfs_dentry_to_name(&oname, odentry, omode);
+- xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode);
++ error = xfs_dentry_mode_to_name(&oname, odentry, omode);
++ if (omode && unlikely(error))
++ return error;
++
++ error = xfs_dentry_mode_to_name(&nname, ndentry,
++ d_inode(odentry)->i_mode);
++ if (unlikely(error))
++ return error;
+
+ return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
+ XFS_I(ndir), &nname,
--- /dev/null
+From hch@lst.de Thu Feb 2 11:15:31 2017
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 2 Feb 2017 08:56:07 +0100
+Subject: xfs: verify dirblocklog correctly
+To: stable@vger.kernel.org
+Cc: linux-xfs@vger.kernel.org, "Darrick J. Wong" <darrick.wong@oracle.com>
+Message-ID: <1486022171-8076-16-git-send-email-hch@lst.de>
+
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 83d230eb5c638949350f4761acdfc0af5cb1bc00 upstream.
+
+sb_dirblklog is added to sb_blocklog to compute the directory block size
+in bytes. Therefore, we must compare the sum of both those values
+against XFS_MAX_BLOCKSIZE_LOG, not just dirblklog.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -242,7 +242,7 @@ xfs_mount_validate_sb(
+ sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
+ sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
+ sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
+- sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG ||
++ sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
+ sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
+ sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
+ sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||