From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 11 Nov 2022 09:04:47 +0000 (+0100)
Subject: 5.4-stable patches
X-Git-Tag: v5.10.155~56
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3ad4b0cdf8551d0d352c9fcbc83cf7ce254f2359;p=thirdparty%2Fkernel%2Fstable-queue.git

5.4-stable patches

added patches:
	xfs-drain-the-buf-delwri-queue-before-xfsaild-idles.patch
	xfs-preserve-inode-versioning-across-remounts.patch
	xfs-preserve-rmapbt-swapext-block-reservation-from-freed-blocks.patch
	xfs-redesign-the-reflink-remap-loop-to-fix-blkres-depletion-crash.patch
	xfs-rename-xfs_bmap_is_real_extent-to-is_written_extent.patch
	xfs-use-mmaplock-around-filemap_map_pages.patch
---

diff --git a/queue-5.10/series b/queue-5.10/series
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/queue-5.4/series b/queue-5.4/series
new file mode 100644
index 00000000000..25176f2fb29
--- /dev/null
+++ b/queue-5.4/series
@@ -0,0 +1,6 @@
+xfs-preserve-rmapbt-swapext-block-reservation-from-freed-blocks.patch
+xfs-rename-xfs_bmap_is_real_extent-to-is_written_extent.patch
+xfs-redesign-the-reflink-remap-loop-to-fix-blkres-depletion-crash.patch
+xfs-use-mmaplock-around-filemap_map_pages.patch
+xfs-preserve-inode-versioning-across-remounts.patch
+xfs-drain-the-buf-delwri-queue-before-xfsaild-idles.patch
diff --git a/queue-5.4/xfs-drain-the-buf-delwri-queue-before-xfsaild-idles.patch b/queue-5.4/xfs-drain-the-buf-delwri-queue-before-xfsaild-idles.patch
new file mode 100644
index 00000000000..169694614ac
--- /dev/null
+++ b/queue-5.4/xfs-drain-the-buf-delwri-queue-before-xfsaild-idles.patch
@@ -0,0 +1,101 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:11:27 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:25 +0530
+Subject: xfs: drain the buf delwri queue before xfsaild idles
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-7-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f376b45e861d8b7b34bf0eceeecfdd00dbe65cde upstream.
+
+xfsaild is racy with respect to transaction abort and shutdown in
+that the task can idle or exit with an empty AIL but buffers still
+on the delwri queue. This was partly addressed by cancelling the
+delwri queue before the task exits to prevent memory leaks, but it's
+also possible for xfsaild to empty and idle with buffers on the
+delwri queue. For example, a transaction that pins a buffer that
+also happens to sit on the AIL delwri queue will explicitly remove
+the associated log item from the AIL if the transaction aborts. The
+side effect of this is an unmount hang in xfs_wait_buftarg() as the
+associated buffers remain held by the delwri queue indefinitely.
+This is reproduced on repeated runs of generic/531 with an fs format
+(-mrmapbt=1 -bsize=1k) that happens to also reproduce transaction
+aborts.
+
+Update xfsaild to not idle until both the AIL and associated delwri
+queue are empty and update the push code to continue delwri queue
+submission attempts even when the AIL is empty. This allows the AIL
+to eventually release aborted buffers stranded on the delwri queue
+when they are unlocked by the associated transaction. This should
+have no significant effect on normal runtime behavior because the
+xfsaild currently idles only when the AIL is empty and in practice
+the AIL is rarely empty with a populated delwri queue. The items
+must be AIL resident to land in the queue in the first place and
+generally aren't removed until writeback completes.
+
+Note that the pre-existing delwri queue cancel logic in the exit
+path is retained because task stop is external, could technically
+come at any point, and xfsaild is still responsible to release its
+buffer references before it exits.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_trans_ail.c |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -402,16 +402,10 @@ xfsaild_push(
+ 	target = ailp->ail_target;
+ 	ailp->ail_target_prev = target;
+ 
++	/* we're done if the AIL is empty or our push has reached the end */
+ 	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
+-	if (!lip) {
+-		/*
+-		 * If the AIL is empty or our push has reached the end we are
+-		 * done now.
+-		 */
+-		xfs_trans_ail_cursor_done(&cur);
+-		spin_unlock(&ailp->ail_lock);
++	if (!lip)
+ 		goto out_done;
+-	}
+ 
+ 	XFS_STATS_INC(mp, xs_push_ail);
+ 
+@@ -493,6 +487,8 @@ xfsaild_push(
+ 			break;
+ 		lsn = lip->li_lsn;
+ 	}
++
++out_done:
+ 	xfs_trans_ail_cursor_done(&cur);
+ 	spin_unlock(&ailp->ail_lock);
+ 
+@@ -500,7 +496,6 @@ xfsaild_push(
+ 		ailp->ail_log_flush++;
+ 
+ 	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
+-out_done:
+ 		/*
+ 		 * We reached the target or the AIL is empty, so wait a bit
+ 		 * longer for I/O to complete and remove pushed items from the
+@@ -592,7 +587,8 @@ xfsaild(
+ 		 */
+ 		smp_rmb();
+ 		if (!xfs_ail_min(ailp) &&
+-		    ailp->ail_target == ailp->ail_target_prev) {
++		    ailp->ail_target == ailp->ail_target_prev &&
++		    list_empty(&ailp->ail_buf_list)) {
+ 			spin_unlock(&ailp->ail_lock);
+ 			freezable_schedule();
+ 			tout = 0;
diff --git a/queue-5.4/xfs-preserve-inode-versioning-across-remounts.patch b/queue-5.4/xfs-preserve-inode-versioning-across-remounts.patch
new file mode 100644
index 00000000000..11e687b3acf
--- /dev/null
+++ b/queue-5.4/xfs-preserve-inode-versioning-across-remounts.patch
@@ -0,0 +1,57 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:14:16 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:24 +0530
+Subject: xfs: preserve inode versioning across remounts
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-6-chandan.babu@oracle.com>
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 4750a171c3290f9bbebca16c6372db723a4cfa3b upstream.
+
+[ For 5.4.y, SB_I_VERSION should be set in xfs_fs_remount() ]
+
+The MS_I_VERSION mount flag is exposed via the VFS, as documented
+in the mount manpages etc; see the iversion and noiversion mount
+options in mount(8).
+
+As a result, mount -o remount looks for this option in /proc/mounts
+and will only send the I_VERSION flag back in during remount it it
+is present.  Since it's not there, a remount will /remove/ the
+I_VERSION flag at the vfs level, and iversion functionality is lost.
+
+xfs v5 superblocks intend to always have i_version enabled; it is
+set as a default at mount time, but is lost during remount for the
+reasons above.
+
+The generic fix would be to expose this documented option in
+/proc/mounts, but since that was rejected, fix it up again in the
+xfs remount path instead, so that at least xfs won't suffer from
+this misbehavior.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_super.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -1228,6 +1228,10 @@ xfs_fs_remount(
+ 	char			*p;
+ 	int			error;
+ 
++	/* version 5 superblocks always support version counters. */
++	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
++		*flags |= SB_I_VERSION;
++
+ 	/* First, check for complete junk; i.e. invalid options */
+ 	error = xfs_test_remount_options(sb, options);
+ 	if (error)
diff --git a/queue-5.4/xfs-preserve-rmapbt-swapext-block-reservation-from-freed-blocks.patch b/queue-5.4/xfs-preserve-rmapbt-swapext-block-reservation-from-freed-blocks.patch
new file mode 100644
index 00000000000..20470aea0e1
--- /dev/null
+++ b/queue-5.4/xfs-preserve-rmapbt-swapext-block-reservation-from-freed-blocks.patch
@@ -0,0 +1,144 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:14:16 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:20 +0530
+Subject: xfs: preserve rmapbt swapext block reservation from freed blocks
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-2-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f74681ba2006434be195402e0b15fc5763cddd7e upstream.
+
+[Slightly modify xfs_trans_alloc() to fix a merge conflict due to missing
+ "atomic_inc(&mp->m_active_trans)" statement in v5.9 kernel]
+
+The rmapbt extent swap algorithm remaps individual extents between
+the source inode and the target to trigger reverse mapping metadata
+updates. If either inode straddles a format or other bmap allocation
+boundary, the individual unmap and map cycles can trigger repeated
+bmap block allocations and frees as the extent count bounces back
+and forth across the boundary. While net block usage is bound across
+the swap operation, this behavior can prematurely exhaust the
+transaction block reservation because it continuously drains as the
+transaction rolls. Each allocation accounts against the reservation
+and each free returns to global free space on transaction roll.
+
+The previous workaround to this problem attempted to detect this
+boundary condition and provide surplus block reservation to
+acommodate it. This is insufficient because more remaps can occur
+than implied by the extent counts; if start offset boundaries are
+not aligned between the two inodes, for example.
+
+To address this problem more generically and dynamically, add a
+transaction accounting mode that returns freed blocks to the
+transaction reservation instead of the superblock counters on
+transaction roll and use it when the rmapbt based algorithm is
+active. This allows the chain of remap transactions to preserve the
+block reservation based own its own frees and prevent premature
+exhaustion regardless of the remap pattern. Note that this is only
+safe for superblocks with lazy sb accounting, but the latter is
+required for v5 supers and the rmap feature depends on v5.
+
+Fixes: b3fed434822d0 ("xfs: account format bouncing into rmapbt swapext tx reservation")
+Root-caused-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_shared.h |    1 +
+ fs/xfs/xfs_bmap_util.c     |   18 +++++++++---------
+ fs/xfs/xfs_trans.c         |   19 ++++++++++++++++++-
+ 3 files changed, 28 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_shared.h
++++ b/fs/xfs/libxfs/xfs_shared.h
+@@ -65,6 +65,7 @@ void	xfs_log_get_max_trans_res(struct xf
+ #define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
+ #define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
+ #define XFS_TRANS_NO_WRITECOUNT 0x40	/* do not elevate SB writecount */
++#define XFS_TRANS_RES_FDBLKS	0x80	/* reserve newly freed blocks */
+ /*
+  * LOWMODE is used by the allocator to activate the lowspace algorithm - when
+  * free space is running low the extent allocator may choose to allocate an
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1740,6 +1740,7 @@ xfs_swap_extents(
+ 	int			lock_flags;
+ 	uint64_t		f;
+ 	int			resblks = 0;
++	unsigned int		flags = 0;
+ 
+ 	/*
+ 	 * Lock the inodes against other IO, page faults and truncate to
+@@ -1795,17 +1796,16 @@ xfs_swap_extents(
+ 		resblks +=  XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
+ 
+ 		/*
+-		 * Handle the corner case where either inode might straddle the
+-		 * btree format boundary. If so, the inode could bounce between
+-		 * btree <-> extent format on unmap -> remap cycles, freeing and
+-		 * allocating a bmapbt block each time.
++		 * If either inode straddles a bmapbt block allocation boundary,
++		 * the rmapbt algorithm triggers repeated allocs and frees as
++		 * extents are remapped. This can exhaust the block reservation
++		 * prematurely and cause shutdown. Return freed blocks to the
++		 * transaction reservation to counter this behavior.
+ 		 */
+-		if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1))
+-			resblks += XFS_IFORK_MAXEXT(ip, w);
+-		if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1))
+-			resblks += XFS_IFORK_MAXEXT(tip, w);
++		flags |= XFS_TRANS_RES_FDBLKS;
+ 	}
+-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
++	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
++				&tp);
+ 	if (error)
+ 		goto out_unlock;
+ 
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -107,7 +107,8 @@ xfs_trans_dup(
+ 
+ 	ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
+ 		       (tp->t_flags & XFS_TRANS_RESERVE) |
+-		       (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
++		       (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) |
++		       (tp->t_flags & XFS_TRANS_RES_FDBLKS);
+ 	/* We gave our writer reference to the new transaction */
+ 	tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
+ 	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
+@@ -273,6 +274,8 @@ xfs_trans_alloc(
+ 	 */
+ 	WARN_ON(resp->tr_logres > 0 &&
+ 		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
++	ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
++	       xfs_sb_version_haslazysbcount(&mp->m_sb));
+ 	atomic_inc(&mp->m_active_trans);
+ 
+ 	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
+@@ -368,6 +371,20 @@ xfs_trans_mod_sb(
+ 			tp->t_blk_res_used += (uint)-delta;
+ 			if (tp->t_blk_res_used > tp->t_blk_res)
+ 				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
++		} else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) {
++			int64_t	blkres_delta;
++
++			/*
++			 * Return freed blocks directly to the reservation
++			 * instead of the global pool, being careful not to
++			 * overflow the trans counter. This is used to preserve
++			 * reservation across chains of transaction rolls that
++			 * repeatedly free and allocate blocks.
++			 */
++			blkres_delta = min_t(int64_t, delta,
++					     UINT_MAX - tp->t_blk_res);
++			tp->t_blk_res += blkres_delta;
++			delta -= blkres_delta;
+ 		}
+ 		tp->t_fdblocks_delta += delta;
+ 		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
diff --git a/queue-5.4/xfs-redesign-the-reflink-remap-loop-to-fix-blkres-depletion-crash.patch b/queue-5.4/xfs-redesign-the-reflink-remap-loop-to-fix-blkres-depletion-crash.patch
new file mode 100644
index 00000000000..e0312394564
--- /dev/null
+++ b/queue-5.4/xfs-redesign-the-reflink-remap-loop-to-fix-blkres-depletion-crash.patch
@@ -0,0 +1,473 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:11:12 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:22 +0530
+Subject: xfs: redesign the reflink remap loop to fix blkres depletion crash
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-4-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 00fd1d56dd08a8ceaa9e4ee1a41fefd9f6c6bc7d upstream.
+
+The existing reflink remapping loop has some structural problems that
+need addressing:
+
+The biggest problem is that we create one transaction for each extent in
+the source file without accounting for the number of mappings there are
+for the same range in the destination file.  In other words, we don't
+know the number of remap operations that will be necessary and we
+therefore cannot guess the block reservation required.  On highly
+fragmented filesystems (e.g. ones with active dedupe) we guess wrong,
+run out of block reservation, and fail.
+
+The second problem is that we don't actually use the bmap intents to
+their full potential -- instead of calling bunmapi directly and having
+to deal with its backwards operation, we could call the deferred ops
+xfs_bmap_unmap_extent and xfs_refcount_decrease_extent instead.  This
+makes the frontend loop much simpler.
+
+Solve all of these problems by refactoring the remapping loops so that
+we only perform one remapping operation per transaction, and each
+operation only tries to remap a single extent from source to dest.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reported-by: Edwin TÃ¶rÃ¶k <edwin@etorok.net>
+Tested-by: Edwin TÃ¶rÃ¶k <edwin@etorok.net>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.h |   13 +-
+ fs/xfs/xfs_reflink.c     |  242 +++++++++++++++++++++++++----------------------
+ fs/xfs/xfs_trace.h       |   52 ----------
+ 3 files changed, 143 insertions(+), 164 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -158,6 +158,13 @@ static inline int xfs_bmapi_whichfork(in
+ 	{ BMAP_ATTRFORK,	"ATTR" }, \
+ 	{ BMAP_COWFORK,		"COW" }
+ 
++/* Return true if the extent is an allocated extent, written or not. */
++static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
++{
++	return irec->br_startblock != HOLESTARTBLOCK &&
++		irec->br_startblock != DELAYSTARTBLOCK &&
++		!isnullstartblock(irec->br_startblock);
++}
+ 
+ /*
+  * Return true if the extent is a real, allocated extent, or false if it is  a
+@@ -165,10 +172,8 @@ static inline int xfs_bmapi_whichfork(in
+  */
+ static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
+ {
+-	return irec->br_state != XFS_EXT_UNWRITTEN &&
+-		irec->br_startblock != HOLESTARTBLOCK &&
+-		irec->br_startblock != DELAYSTARTBLOCK &&
+-		!isnullstartblock(irec->br_startblock);
++	return xfs_bmap_is_real_extent(irec) &&
++	       irec->br_state != XFS_EXT_UNWRITTEN;
+ }
+ 
+ /*
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -986,41 +986,28 @@ xfs_reflink_ag_has_free_space(
+ }
+ 
+ /*
+- * Unmap a range of blocks from a file, then map other blocks into the hole.
+- * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount).
+- * The extent irec is mapped into dest at irec->br_startoff.
++ * Remap the given extent into the file.  The dmap blockcount will be set to
++ * the number of blocks that were actually remapped.
+  */
+ STATIC int
+ xfs_reflink_remap_extent(
+ 	struct xfs_inode	*ip,
+-	struct xfs_bmbt_irec	*irec,
+-	xfs_fileoff_t		destoff,
++	struct xfs_bmbt_irec	*dmap,
+ 	xfs_off_t		new_isize)
+ {
++	struct xfs_bmbt_irec	smap;
+ 	struct xfs_mount	*mp = ip->i_mount;
+-	bool			real_extent = xfs_bmap_is_written_extent(irec);
+ 	struct xfs_trans	*tp;
+-	unsigned int		resblks;
+-	struct xfs_bmbt_irec	uirec;
+-	xfs_filblks_t		rlen;
+-	xfs_filblks_t		unmap_len;
+ 	xfs_off_t		newlen;
+-	int64_t			qres;
++	int64_t			qres, qdelta;
++	unsigned int		resblks;
++	bool			smap_real;
++	bool			dmap_written = xfs_bmap_is_written_extent(dmap);
++	int			nimaps;
+ 	int			error;
+ 
+-	unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
+-	trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
+-
+-	/* No reflinking if we're low on space */
+-	if (real_extent) {
+-		error = xfs_reflink_ag_has_free_space(mp,
+-				XFS_FSB_TO_AGNO(mp, irec->br_startblock));
+-		if (error)
+-			goto out;
+-	}
+-
+ 	/* Start a rolling transaction to switch the mappings */
+-	resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
++	resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+ 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+ 	if (error)
+ 		goto out;
+@@ -1029,92 +1016,121 @@ xfs_reflink_remap_extent(
+ 	xfs_trans_ijoin(tp, ip, 0);
+ 
+ 	/*
+-	 * Reserve quota for this operation.  We don't know if the first unmap
+-	 * in the dest file will cause a bmap btree split, so we always reserve
+-	 * at least enough blocks for that split.  If the extent being mapped
+-	 * in is written, we need to reserve quota for that too.
++	 * Read what's currently mapped in the destination file into smap.
++	 * If smap isn't a hole, we will have to remove it before we can add
++	 * dmap to the destination file.
+ 	 */
+-	qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+-	if (real_extent)
+-		qres += irec->br_blockcount;
+-	error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0,
+-			XFS_QMOPT_RES_REGBLKS);
++	nimaps = 1;
++	error = xfs_bmapi_read(ip, dmap->br_startoff, dmap->br_blockcount,
++			&smap, &nimaps, 0);
+ 	if (error)
+ 		goto out_cancel;
++	ASSERT(nimaps == 1 && smap.br_startoff == dmap->br_startoff);
++	smap_real = xfs_bmap_is_real_extent(&smap);
+ 
+-	trace_xfs_reflink_remap(ip, irec->br_startoff,
+-				irec->br_blockcount, irec->br_startblock);
++	/*
++	 * We can only remap as many blocks as the smaller of the two extent
++	 * maps, because we can only remap one extent at a time.
++	 */
++	dmap->br_blockcount = min(dmap->br_blockcount, smap.br_blockcount);
++	ASSERT(dmap->br_blockcount == smap.br_blockcount);
++
++	trace_xfs_reflink_remap_extent_dest(ip, &smap);
+ 
+-	/* Unmap the old blocks in the data fork. */
+-	rlen = unmap_len;
+-	while (rlen) {
+-		ASSERT(tp->t_firstblock == NULLFSBLOCK);
+-		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1);
++	/* No reflinking if the AG of the dest mapping is low on space. */
++	if (dmap_written) {
++		error = xfs_reflink_ag_has_free_space(mp,
++				XFS_FSB_TO_AGNO(mp, dmap->br_startblock));
+ 		if (error)
+ 			goto out_cancel;
++	}
+ 
++	/*
++	 * Compute quota reservation if we think the quota block counter for
++	 * this file could increase.
++	 *
++	 * We start by reserving enough blocks to handle a bmbt split.
++	 *
++	 * If we are mapping a written extent into the file, we need to have
++	 * enough quota block count reservation to handle the blocks in that
++	 * extent.
++	 *
++	 * Note that if we're replacing a delalloc reservation with a written
++	 * extent, we have to take the full quota reservation because removing
++	 * the delalloc reservation gives the block count back to the quota
++	 * count.  This is suboptimal, but the VFS flushed the dest range
++	 * before we started.  That should have removed all the delalloc
++	 * reservations, but we code defensively.
++	 */
++	qdelta = 0;
++	qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
++	if (dmap_written)
++		qres += dmap->br_blockcount;
++	error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0,
++			XFS_QMOPT_RES_REGBLKS);
++	if (error)
++		goto out_cancel;
++
++	if (smap_real) {
+ 		/*
+-		 * Trim the extent to whatever got unmapped.
+-		 * Remember, bunmapi works backwards.
++		 * If the extent we're unmapping is backed by storage (written
++		 * or not), unmap the extent and drop its refcount.
+ 		 */
+-		uirec.br_startblock = irec->br_startblock + rlen;
+-		uirec.br_startoff = irec->br_startoff + rlen;
+-		uirec.br_blockcount = unmap_len - rlen;
+-		uirec.br_state = irec->br_state;
+-		unmap_len = rlen;
+-
+-		/* If this isn't a real mapping, we're done. */
+-		if (!real_extent || uirec.br_blockcount == 0)
+-			goto next_extent;
+-
+-		trace_xfs_reflink_remap(ip, uirec.br_startoff,
+-				uirec.br_blockcount, uirec.br_startblock);
+-
+-		/* Update the refcount tree */
+-		xfs_refcount_increase_extent(tp, &uirec);
+-
+-		/* Map the new blocks into the data fork. */
+-		xfs_bmap_map_extent(tp, ip, &uirec);
+-
+-		/* Update quota accounting. */
+-		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
+-				uirec.br_blockcount);
+-
+-		/* Update dest isize if needed. */
+-		newlen = XFS_FSB_TO_B(mp,
+-				uirec.br_startoff + uirec.br_blockcount);
+-		newlen = min_t(xfs_off_t, newlen, new_isize);
+-		if (newlen > i_size_read(VFS_I(ip))) {
+-			trace_xfs_reflink_update_inode_size(ip, newlen);
+-			i_size_write(VFS_I(ip), newlen);
+-			ip->i_d.di_size = newlen;
+-			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+-		}
++		xfs_bmap_unmap_extent(tp, ip, &smap);
++		xfs_refcount_decrease_extent(tp, &smap);
++		qdelta -= smap.br_blockcount;
++	} else if (smap.br_startblock == DELAYSTARTBLOCK) {
++		xfs_filblks_t	len = smap.br_blockcount;
+ 
+-next_extent:
+-		/* Process all the deferred stuff. */
+-		error = xfs_defer_finish(&tp);
++		/*
++		 * If the extent we're unmapping is a delalloc reservation,
++		 * we can use the regular bunmapi function to release the
++		 * incore state.  Dropping the delalloc reservation takes care
++		 * of the quota reservation for us.
++		 */
++		error = __xfs_bunmapi(NULL, ip, smap.br_startoff, &len, 0, 1);
+ 		if (error)
+ 			goto out_cancel;
++		ASSERT(len == 0);
++	}
++
++	/*
++	 * If the extent we're sharing is backed by written storage, increase
++	 * its refcount and map it into the file.
++	 */
++	if (dmap_written) {
++		xfs_refcount_increase_extent(tp, dmap);
++		xfs_bmap_map_extent(tp, ip, dmap);
++		qdelta += dmap->br_blockcount;
++	}
++
++	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, qdelta);
++
++	/* Update dest isize if needed. */
++	newlen = XFS_FSB_TO_B(mp, dmap->br_startoff + dmap->br_blockcount);
++	newlen = min_t(xfs_off_t, newlen, new_isize);
++	if (newlen > i_size_read(VFS_I(ip))) {
++		trace_xfs_reflink_update_inode_size(ip, newlen);
++		i_size_write(VFS_I(ip), newlen);
++		ip->i_d.di_size = newlen;
++		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ 	}
+ 
++	/* Commit everything and unlock. */
+ 	error = xfs_trans_commit(tp);
+-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+-	if (error)
+-		goto out;
+-	return 0;
++	goto out_unlock;
+ 
+ out_cancel:
+ 	xfs_trans_cancel(tp);
++out_unlock:
+ 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ out:
+-	trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_);
++	if (error)
++		trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_);
+ 	return error;
+ }
+ 
+-/*
+- * Iteratively remap one file's extents (and holes) to another's.
+- */
++/* Remap a range of one file to the other. */
+ int
+ xfs_reflink_remap_blocks(
+ 	struct xfs_inode	*src,
+@@ -1125,25 +1141,22 @@ xfs_reflink_remap_blocks(
+ 	loff_t			*remapped)
+ {
+ 	struct xfs_bmbt_irec	imap;
+-	xfs_fileoff_t		srcoff;
+-	xfs_fileoff_t		destoff;
++	struct xfs_mount	*mp = src->i_mount;
++	xfs_fileoff_t		srcoff = XFS_B_TO_FSBT(mp, pos_in);
++	xfs_fileoff_t		destoff = XFS_B_TO_FSBT(mp, pos_out);
+ 	xfs_filblks_t		len;
+-	xfs_filblks_t		range_len;
+ 	xfs_filblks_t		remapped_len = 0;
+ 	xfs_off_t		new_isize = pos_out + remap_len;
+ 	int			nimaps;
+ 	int			error = 0;
+ 
+-	destoff = XFS_B_TO_FSBT(src->i_mount, pos_out);
+-	srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in);
+-	len = XFS_B_TO_FSB(src->i_mount, remap_len);
+-
+-	/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
+-	while (len) {
+-		uint		lock_mode;
++	len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len),
++			XFS_MAX_FILEOFF);
+ 
+-		trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
+-				dest, destoff);
++	trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff);
++
++	while (len > 0) {
++		unsigned int	lock_mode;
+ 
+ 		/* Read extent from the source file */
+ 		nimaps = 1;
+@@ -1152,18 +1165,25 @@ xfs_reflink_remap_blocks(
+ 		xfs_iunlock(src, lock_mode);
+ 		if (error)
+ 			break;
+-		ASSERT(nimaps == 1);
++		/*
++		 * The caller supposedly flushed all dirty pages in the source
++		 * file range, which means that writeback should have allocated
++		 * or deleted all delalloc reservations in that range.  If we
++		 * find one, that's a good sign that something is seriously
++		 * wrong here.
++		 */
++		ASSERT(nimaps == 1 && imap.br_startoff == srcoff);
++		if (imap.br_startblock == DELAYSTARTBLOCK) {
++			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
++			error = -EFSCORRUPTED;
++			break;
++		}
+ 
+-		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_DATA_FORK,
+-				&imap);
++		trace_xfs_reflink_remap_extent_src(src, &imap);
+ 
+-		/* Translate imap into the destination file. */
+-		range_len = imap.br_startoff + imap.br_blockcount - srcoff;
+-		imap.br_startoff += destoff - srcoff;
+-
+-		/* Clear dest from destoff to the end of imap and map it in. */
+-		error = xfs_reflink_remap_extent(dest, &imap, destoff,
+-				new_isize);
++		/* Remap into the destination file at the given offset. */
++		imap.br_startoff = destoff;
++		error = xfs_reflink_remap_extent(dest, &imap, new_isize);
+ 		if (error)
+ 			break;
+ 
+@@ -1173,10 +1193,10 @@ xfs_reflink_remap_blocks(
+ 		}
+ 
+ 		/* Advance drange/srange */
+-		srcoff += range_len;
+-		destoff += range_len;
+-		len -= range_len;
+-		remapped_len += range_len;
++		srcoff += imap.br_blockcount;
++		destoff += imap.br_blockcount;
++		len -= imap.br_blockcount;
++		remapped_len += imap.br_blockcount;
+ 	}
+ 
+ 	if (error)
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -3078,8 +3078,7 @@ DEFINE_EVENT(xfs_inode_irec_class, name,
+ DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag);
+ DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag);
+ DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size);
+-DEFINE_IMAP_EVENT(xfs_reflink_remap_imap);
+-TRACE_EVENT(xfs_reflink_remap_blocks_loop,
++TRACE_EVENT(xfs_reflink_remap_blocks,
+ 	TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset,
+ 		 xfs_filblks_t len, struct xfs_inode *dest,
+ 		 xfs_fileoff_t doffset),
+@@ -3110,59 +3109,14 @@ TRACE_EVENT(xfs_reflink_remap_blocks_loo
+ 		  __entry->dest_ino,
+ 		  __entry->dest_lblk)
+ );
+-TRACE_EVENT(xfs_reflink_punch_range,
+-	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
+-		 xfs_extlen_t len),
+-	TP_ARGS(ip, lblk, len),
+-	TP_STRUCT__entry(
+-		__field(dev_t, dev)
+-		__field(xfs_ino_t, ino)
+-		__field(xfs_fileoff_t, lblk)
+-		__field(xfs_extlen_t, len)
+-	),
+-	TP_fast_assign(
+-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+-		__entry->ino = ip->i_ino;
+-		__entry->lblk = lblk;
+-		__entry->len = len;
+-	),
+-	TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x",
+-		  MAJOR(__entry->dev), MINOR(__entry->dev),
+-		  __entry->ino,
+-		  __entry->lblk,
+-		  __entry->len)
+-);
+-TRACE_EVENT(xfs_reflink_remap,
+-	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
+-		 xfs_extlen_t len, xfs_fsblock_t new_pblk),
+-	TP_ARGS(ip, lblk, len, new_pblk),
+-	TP_STRUCT__entry(
+-		__field(dev_t, dev)
+-		__field(xfs_ino_t, ino)
+-		__field(xfs_fileoff_t, lblk)
+-		__field(xfs_extlen_t, len)
+-		__field(xfs_fsblock_t, new_pblk)
+-	),
+-	TP_fast_assign(
+-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+-		__entry->ino = ip->i_ino;
+-		__entry->lblk = lblk;
+-		__entry->len = len;
+-		__entry->new_pblk = new_pblk;
+-	),
+-	TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x new_pblk %llu",
+-		  MAJOR(__entry->dev), MINOR(__entry->dev),
+-		  __entry->ino,
+-		  __entry->lblk,
+-		  __entry->len,
+-		  __entry->new_pblk)
+-);
+ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
++DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src);
++DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest);
+ 
+ /* dedupe tracepoints */
+ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
diff --git a/queue-5.4/xfs-rename-xfs_bmap_is_real_extent-to-is_written_extent.patch b/queue-5.4/xfs-rename-xfs_bmap_is_real_extent-to-is_written_extent.patch
new file mode 100644
index 00000000000..934699d6c3a
--- /dev/null
+++ b/queue-5.4/xfs-rename-xfs_bmap_is_real_extent-to-is_written_extent.patch
@@ -0,0 +1,90 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:11:10 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:21 +0530
+Subject: xfs: rename xfs_bmap_is_real_extent to is_written_extent
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-3-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 877f58f53684f14ca3202640f70592bf44890924 upstream.
+
+[ Slightly modify fs/xfs/libxfs/xfs_rtbitmap.c & fs/xfs/xfs_reflink.c to
+  resolve merge conflict ]
+
+The name of this predicate is a little misleading -- it decides if the
+extent mapping is allocated and written.  Change the name to be more
+direct, as we're going to add a new predicate in the next patch.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.h     |    2 +-
+ fs/xfs/libxfs/xfs_rtbitmap.c |    2 +-
+ fs/xfs/xfs_reflink.c         |    8 ++++----
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -163,7 +163,7 @@ static inline int xfs_bmapi_whichfork(in
+  * Return true if the extent is a real, allocated extent, or false if it is  a
+  * delayed allocation, and unwritten extent or a hole.
+  */
+-static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
++static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
+ {
+ 	return irec->br_state != XFS_EXT_UNWRITTEN &&
+ 		irec->br_startblock != HOLESTARTBLOCK &&
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -70,7 +70,7 @@ xfs_rtbuf_get(
+ 	if (error)
+ 		return error;
+ 
+-	if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) {
++	if (nmap == 0 || !xfs_bmap_is_written_extent(&map)) {
+ 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ 		return -EFSCORRUPTED;
+ 	}
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -181,7 +181,7 @@ xfs_reflink_trim_around_shared(
+ 	int			error = 0;
+ 
+ 	/* Holes, unwritten, and delalloc extents cannot be shared */
+-	if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
++	if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_written_extent(irec)) {
+ 		*shared = false;
+ 		return 0;
+ 	}
+@@ -657,7 +657,7 @@ xfs_reflink_end_cow_extent(
+ 	 * preallocations can leak into the range we are called upon, and we
+ 	 * need to skip them.
+ 	 */
+-	if (!xfs_bmap_is_real_extent(&got)) {
++	if (!xfs_bmap_is_written_extent(&got)) {
+ 		*end_fsb = del.br_startoff;
+ 		goto out_cancel;
+ 	}
+@@ -998,7 +998,7 @@ xfs_reflink_remap_extent(
+ 	xfs_off_t		new_isize)
+ {
+ 	struct xfs_mount	*mp = ip->i_mount;
+-	bool			real_extent = xfs_bmap_is_real_extent(irec);
++	bool			real_extent = xfs_bmap_is_written_extent(irec);
+ 	struct xfs_trans	*tp;
+ 	unsigned int		resblks;
+ 	struct xfs_bmbt_irec	uirec;
+@@ -1427,7 +1427,7 @@ xfs_reflink_dirty_extents(
+ 			goto out;
+ 		if (nmaps == 0)
+ 			break;
+-		if (!xfs_bmap_is_real_extent(&map[0]))
++		if (!xfs_bmap_is_written_extent(&map[0]))
+ 			goto next;
+ 
+ 		map[1] = map[0];
diff --git a/queue-5.4/xfs-use-mmaplock-around-filemap_map_pages.patch b/queue-5.4/xfs-use-mmaplock-around-filemap_map_pages.patch
new file mode 100644
index 00000000000..0c2aeddaa34
--- /dev/null
+++ b/queue-5.4/xfs-use-mmaplock-around-filemap_map_pages.patch
@@ -0,0 +1,67 @@
+From stable-owner@vger.kernel.org Fri Nov 11 05:11:19 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Fri, 11 Nov 2022 09:40:23 +0530
+Subject: xfs: use MMAPLOCK around filemap_map_pages()
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221111041025.87704-5-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit cd647d5651c0b0deaa26c1acb9e1789437ba9bc7 upstream.
+
+The page faultround path ->map_pages is implemented in XFS via
+filemap_map_pages(). This function checks that pages found in page
+cache lookups have not raced with truncate based invalidation by
+checking page->mapping is correct and page->index is within EOF.
+
+However, we've known for a long time that this is not sufficient to
+protect against races with invalidations done by operations that do
+not change EOF. e.g. hole punching and other fallocate() based
+direct extent manipulations. The way we protect against these
+races is we wrap the page fault operations in a XFS_MMAPLOCK_SHARED
+lock so they serialise against fallocate and truncate before calling
+into the filemap function that processes the fault.
+
+Do the same for XFS's ->map_pages implementation to close this
+potential data corruption issue.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1267,10 +1267,23 @@ xfs_filemap_pfn_mkwrite(
+ 	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
+ }
+ 
++static void
++xfs_filemap_map_pages(
++	struct vm_fault		*vmf,
++	pgoff_t			start_pgoff,
++	pgoff_t			end_pgoff)
++{
++	struct inode		*inode = file_inode(vmf->vma->vm_file);
++
++	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
++	filemap_map_pages(vmf, start_pgoff, end_pgoff);
++	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
++}
++
+ static const struct vm_operations_struct xfs_file_vm_ops = {
+ 	.fault		= xfs_filemap_fault,
+ 	.huge_fault	= xfs_filemap_huge_fault,
+-	.map_pages	= filemap_map_pages,
++	.map_pages	= xfs_filemap_map_pages,
+ 	.page_mkwrite	= xfs_filemap_page_mkwrite,
+ 	.pfn_mkwrite	= xfs_filemap_pfn_mkwrite,
+ };