From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 7 Aug 2018 13:23:17 +0000 (+0200)
Subject: 4.17-stable patches
X-Git-Tag: v4.17.14~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6319ccf20201be1391973c2d1bc7de26b2acc66a;p=thirdparty%2Fkernel%2Fstable-queue.git

4.17-stable patches

added patches:
	btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch
	i2c-imx-fix-reinit_completion-use.patch
	ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch
	xfs-more-robust-inode-extent-count-validation.patch
---

diff --git a/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch b/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch
new file mode 100644
index 00000000000..5be5d5f46ad
--- /dev/null
+++ b/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch
@@ -0,0 +1,105 @@
+From bd3599a0e142cd73edd3b6801068ac3f48ac771a Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 12 Jul 2018 01:36:43 +0100
+Subject: Btrfs: fix file data corruption after cloning a range and fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit bd3599a0e142cd73edd3b6801068ac3f48ac771a upstream.
+
+When we clone a range into a file we can end up dropping existing
+extent maps (or trimming them) and replacing them with new ones if the
+range to be cloned overlaps with a range in the destination inode.
+When that happens we add the new extent maps to the list of modified
+extents in the inode's extent map tree, so that a "fast" fsync (the flag
+BTRFS_INODE_NEEDS_FULL_SYNC not set in the inode) will see the extent maps
+and log corresponding extent items. However, at the end of range cloning
+operation we do truncate all the pages in the affected range (in order to
+ensure future reads will not get stale data). Sometimes this truncation
+will release the corresponding extent maps besides the pages from the page
+cache. If this happens, then a "fast" fsync operation will miss logging
+some extent items, because it relies exclusively on the extent maps being
+present in the inode's extent tree, leading to data loss/corruption if
+the fsync ends up using the same transaction used by the clone operation
+(that transaction was not committed in the meanwhile). An extent map is
+released through the callback btrfs_invalidatepage(), which gets called by
+truncate_inode_pages_range(), and it calls __btrfs_releasepage(). The
+later ends up calling try_release_extent_mapping() which will release the
+extent map if some conditions are met, like the file size being greater
+than 16Mb, gfp flags allow blocking and the range not being locked (which
+is the case during the clone operation) nor being the extent map flagged
+as pinned (also the case for cloning).
+
+The following example, turned into a test for fstests, reproduces the
+issue:
+
+  $ mkfs.btrfs -f /dev/sdb
+  $ mount /dev/sdb /mnt
+
+  $ xfs_io -f -c "pwrite -S 0x18 9000K 6908K" /mnt/foo
+  $ xfs_io -f -c "pwrite -S 0x20 2572K 156K" /mnt/bar
+
+  $ xfs_io -c "fsync" /mnt/bar
+  # reflink destination offset corresponds to the size of file bar,
+  # 2728Kb minus 4Kb.
+  $ xfs_io -c ""reflink ${SCRATCH_MNT}/foo 0 2724K 15908K" /mnt/bar
+  $ xfs_io -c "fsync" /mnt/bar
+
+  $ md5sum /mnt/bar
+  95a95813a8c2abc9aa75a6c2914a077e  /mnt/bar
+
+  <power fail>
+
+  $ mount /dev/sdb /mnt
+  $ md5sum /mnt/bar
+  207fd8d0b161be8a84b945f0df8d5f8d  /mnt/bar
+  # digest should be 95a95813a8c2abc9aa75a6c2914a077e like before the
+  # power failure
+
+In the above example, the destination offset of the clone operation
+corresponds to the size of the "bar" file minus 4Kb. So during the clone
+operation, the extent map covering the range from 2572Kb to 2728Kb gets
+trimmed so that it ends at offset 2724Kb, and a new extent map covering
+the range from 2724Kb to 11724Kb is created. So at the end of the clone
+operation when we ask to truncate the pages in the range from 2724Kb to
+2724Kb + 15908Kb, the page invalidation callback ends up removing the new
+extent map (through try_release_extent_mapping()) when the page at offset
+2724Kb is passed to that callback.
+
+Fix this by setting the bit BTRFS_INODE_NEEDS_FULL_SYNC whenever an extent
+map is removed at try_release_extent_mapping(), forcing the next fsync to
+search for modified extents in the fs/subvolume tree instead of relying on
+the presence of extent maps in memory. This way we can continue doing a
+"fast" fsync if the destination range of a clone operation does not
+overlap with an existing range or if any of the criteria necessary to
+remove an extent map at try_release_extent_mapping() is not met (file
+size not bigger then 16Mb or gfp flags do not allow blocking).
+
+CC: stable@vger.kernel.org # 3.16+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4245,6 +4245,7 @@ int try_release_extent_mapping(struct ex
+ 	struct extent_map *em;
+ 	u64 start = page_offset(page);
+ 	u64 end = start + PAGE_SIZE - 1;
++	struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
+ 
+ 	if (gfpflags_allow_blocking(mask) &&
+ 	    page->mapping->host->i_size > SZ_16M) {
+@@ -4267,6 +4268,8 @@ int try_release_extent_mapping(struct ex
+ 					    extent_map_end(em) - 1,
+ 					    EXTENT_LOCKED | EXTENT_WRITEBACK,
+ 					    0, NULL)) {
++				set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
++					&btrfs_inode->runtime_flags);
+ 				remove_extent_mapping(map, em);
+ 				/* once for the rb tree */
+ 				free_extent_map(em);
diff --git a/queue-4.17/i2c-imx-fix-reinit_completion-use.patch b/queue-4.17/i2c-imx-fix-reinit_completion-use.patch
new file mode 100644
index 00000000000..e92bff2cd80
--- /dev/null
+++ b/queue-4.17/i2c-imx-fix-reinit_completion-use.patch
@@ -0,0 +1,53 @@
+From 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff Mon Sep 17 00:00:00 2001
+From: Esben Haabendal <eha@deif.com>
+Date: Mon, 9 Jul 2018 11:43:01 +0200
+Subject: i2c: imx: Fix reinit_completion() use
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Esben Haabendal <eha@deif.com>
+
+commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream.
+
+Make sure to call reinit_completion() before dma is started to avoid race
+condition where reinit_completion() is called after complete() and before
+wait_for_completion_timeout().
+
+Signed-off-by: Esben Haabendal <eha@deif.com>
+Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver")
+Reviewed-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
+Cc: stable@kernel.org
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-imx.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-imx.c
++++ b/drivers/i2c/busses/i2c-imx.c
+@@ -377,6 +377,7 @@ static int i2c_imx_dma_xfer(struct imx_i
+ 		goto err_desc;
+ 	}
+ 
++	reinit_completion(&dma->cmd_complete);
+ 	txdesc->callback = i2c_imx_dma_callback;
+ 	txdesc->callback_param = i2c_imx;
+ 	if (dma_submit_error(dmaengine_submit(txdesc))) {
+@@ -631,7 +632,6 @@ static int i2c_imx_dma_write(struct imx_
+ 	 * The first byte must be transmitted by the CPU.
+ 	 */
+ 	imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR);
+-	reinit_completion(&i2c_imx->dma->cmd_complete);
+ 	time_left = wait_for_completion_timeout(
+ 				&i2c_imx->dma->cmd_complete,
+ 				msecs_to_jiffies(DMA_TIMEOUT));
+@@ -690,7 +690,6 @@ static int i2c_imx_dma_read(struct imx_i
+ 	if (result)
+ 		return result;
+ 
+-	reinit_completion(&i2c_imx->dma->cmd_complete);
+ 	time_left = wait_for_completion_timeout(
+ 				&i2c_imx->dma->cmd_complete,
+ 				msecs_to_jiffies(DMA_TIMEOUT));
diff --git a/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch b/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch
new file mode 100644
index 00000000000..aede6cf62d0
--- /dev/null
+++ b/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch
@@ -0,0 +1,103 @@
+From 73c8d8945505acdcbae137c2e00a1232e0be709f Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sat, 14 Jul 2018 01:28:15 +0900
+Subject: ring_buffer: tracing: Inherit the tracing setting to next ring buffer
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream.
+
+Maintain the tracing on/off setting of the ring_buffer when switching
+to the trace buffer snapshot.
+
+Taking a snapshot is done by swapping the backup ring buffer
+(max_tr_buffer). But since the tracing on/off setting is defined
+by the ring buffer, when swapping it, the tracing on/off setting
+can also be changed. This causes a strange result like below:
+
+  /sys/kernel/debug/tracing # cat tracing_on
+  1
+  /sys/kernel/debug/tracing # echo 0 > tracing_on
+  /sys/kernel/debug/tracing # cat tracing_on
+  0
+  /sys/kernel/debug/tracing # echo 1 > snapshot
+  /sys/kernel/debug/tracing # cat tracing_on
+  1
+  /sys/kernel/debug/tracing # echo 1 > snapshot
+  /sys/kernel/debug/tracing # cat tracing_on
+  0
+
+We don't touch tracing_on, but snapshot changes tracing_on
+setting each time. This is an anomaly, because user doesn't know
+that each "ring_buffer" stores its own tracing-enable state and
+the snapshot is done by swapping ring buffers.
+
+Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox
+
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
+Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
+Cc: stable@vger.kernel.org
+Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+[ Updated commit log and comment in the code ]
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ring_buffer.h |    1 +
+ kernel/trace/ring_buffer.c  |   16 ++++++++++++++++
+ kernel/trace/trace.c        |    6 ++++++
+ 3 files changed, 23 insertions(+)
+
+--- a/include/linux/ring_buffer.h
++++ b/include/linux/ring_buffer.h
+@@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ri
+ void ring_buffer_record_off(struct ring_buffer *buffer);
+ void ring_buffer_record_on(struct ring_buffer *buffer);
+ int ring_buffer_record_is_on(struct ring_buffer *buffer);
++int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
+ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
+ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+ 
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -3227,6 +3227,22 @@ int ring_buffer_record_is_on(struct ring
+ }
+ 
+ /**
++ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
++ * @buffer: The ring buffer to see if write is set enabled
++ *
++ * Returns true if the ring buffer is set writable by ring_buffer_record_on().
++ * Note that this does NOT mean it is in a writable state.
++ *
++ * It may return true when the ring buffer has been disabled by
++ * ring_buffer_record_disable(), as that is a temporary disabling of
++ * the ring buffer.
++ */
++int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
++{
++	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
++}
++
++/**
+  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+  * @buffer: The ring buffer to stop writes to.
+  * @cpu: The CPU buffer to stop
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1375,6 +1375,12 @@ update_max_tr(struct trace_array *tr, st
+ 
+ 	arch_spin_lock(&tr->max_lock);
+ 
++	/* Inherit the recordable setting from trace_buffer */
++	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
++		ring_buffer_record_on(tr->max_buffer.buffer);
++	else
++		ring_buffer_record_off(tr->max_buffer.buffer);
++
+ 	buf = tr->trace_buffer.buffer;
+ 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
+ 	tr->max_buffer.buffer = buf;
diff --git a/queue-4.17/series b/queue-4.17/series
index 6036d822190..103139ba64b 100644
--- a/queue-4.17/series
+++ b/queue-4.17/series
@@ -9,3 +9,7 @@ perf-x86-intel-uncore-fix-hardcoded-index-of-broadwell-extra-pci-devices.patch
 nohz-fix-local_timer_softirq_pending.patch
 nohz-fix-missing-tick-reprogram-when-interrupting-an-inline-softirq.patch
 netlink-don-t-shift-on-64-for-ngroups.patch
+xfs-more-robust-inode-extent-count-validation.patch
+ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch
+i2c-imx-fix-reinit_completion-use.patch
+btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch
diff --git a/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch b/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch
new file mode 100644
index 00000000000..fc01dbe93a7
--- /dev/null
+++ b/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch
@@ -0,0 +1,141 @@
+From 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Thu, 21 Jun 2018 23:25:57 -0700
+Subject: xfs: More robust inode extent count validation
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 upstream.
+
+When the inode is in extent format, it can't have more extents that
+fit in the inode fork. We don't currenty check this, and so this
+corruption goes unnoticed by the inode verifiers. This can lead to
+crashes operating on invalid in-memory structures.
+
+Attempts to access such a inode will now error out in the verifier
+rather than allowing modification operations to proceed.
+
+Reported-by: Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix a typedef, add some braces and breaks to shut up compiler warnings]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Cc: Yuki Machida <machida.yuki@jp.fujitsu.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_format.h    |    3 +
+ fs/xfs/libxfs/xfs_inode_buf.c |   76 +++++++++++++++++++++++++-----------------
+ 2 files changed, 50 insertions(+), 29 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -971,6 +971,9 @@ typedef enum xfs_dinode_fmt {
+ 		XFS_DFORK_DSIZE(dip, mp) : \
+ 		XFS_DFORK_ASIZE(dip, mp))
+ 
++#define XFS_DFORK_MAXEXT(dip, mp, w) \
++	(XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
++
+ /*
+  * Return pointers to the data or attribute forks.
+  */
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -391,6 +391,47 @@ xfs_log_dinode_to_disk(
+ 	}
+ }
+ 
++static xfs_failaddr_t
++xfs_dinode_verify_fork(
++	struct xfs_dinode	*dip,
++	struct xfs_mount	*mp,
++	int			whichfork)
++{
++	uint32_t		di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
++
++	switch (XFS_DFORK_FORMAT(dip, whichfork)) {
++	case XFS_DINODE_FMT_LOCAL:
++		/*
++		 * no local regular files yet
++		 */
++		if (whichfork == XFS_DATA_FORK) {
++			if (S_ISREG(be16_to_cpu(dip->di_mode)))
++				return __this_address;
++			if (be64_to_cpu(dip->di_size) >
++					XFS_DFORK_SIZE(dip, mp, whichfork))
++				return __this_address;
++		}
++		if (di_nextents)
++			return __this_address;
++		break;
++	case XFS_DINODE_FMT_EXTENTS:
++		if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
++			return __this_address;
++		break;
++	case XFS_DINODE_FMT_BTREE:
++		if (whichfork == XFS_ATTR_FORK) {
++			if (di_nextents > MAXAEXTNUM)
++				return __this_address;
++		} else if (di_nextents > MAXEXTNUM) {
++			return __this_address;
++		}
++		break;
++	default:
++		return __this_address;
++	}
++	return NULL;
++}
++
+ xfs_failaddr_t
+ xfs_dinode_verify(
+ 	struct xfs_mount	*mp,
+@@ -457,24 +498,9 @@ xfs_dinode_verify(
+ 	case S_IFREG:
+ 	case S_IFLNK:
+ 	case S_IFDIR:
+-		switch (dip->di_format) {
+-		case XFS_DINODE_FMT_LOCAL:
+-			/*
+-			 * no local regular files yet
+-			 */
+-			if (S_ISREG(mode))
+-				return __this_address;
+-			if (di_size > XFS_DFORK_DSIZE(dip, mp))
+-				return __this_address;
+-			if (dip->di_nextents)
+-				return __this_address;
+-			/* fall through */
+-		case XFS_DINODE_FMT_EXTENTS:
+-		case XFS_DINODE_FMT_BTREE:
+-			break;
+-		default:
+-			return __this_address;
+-		}
++		fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
++		if (fa)
++			return fa;
+ 		break;
+ 	case 0:
+ 		/* Uninitialized inode ok. */
+@@ -484,17 +510,9 @@ xfs_dinode_verify(
+ 	}
+ 
+ 	if (XFS_DFORK_Q(dip)) {
+-		switch (dip->di_aformat) {
+-		case XFS_DINODE_FMT_LOCAL:
+-			if (dip->di_anextents)
+-				return __this_address;
+-		/* fall through */
+-		case XFS_DINODE_FMT_EXTENTS:
+-		case XFS_DINODE_FMT_BTREE:
+-			break;
+-		default:
+-			return __this_address;
+-		}
++		fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
++		if (fa)
++			return fa;
+ 	} else {
+ 		/*
+ 		 * If there is no fork offset, this may be a freshly-made inode