From: Greg Kroah-Hartman Date: Tue, 7 Aug 2018 13:23:17 +0000 (+0200) Subject: 4.17-stable patches X-Git-Tag: v4.17.14~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6319ccf20201be1391973c2d1bc7de26b2acc66a;p=thirdparty%2Fkernel%2Fstable-queue.git 4.17-stable patches added patches: btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch i2c-imx-fix-reinit_completion-use.patch ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch xfs-more-robust-inode-extent-count-validation.patch --- diff --git a/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch b/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch new file mode 100644 index 00000000000..5be5d5f46ad --- /dev/null +++ b/queue-4.17/btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch @@ -0,0 +1,105 @@ +From bd3599a0e142cd73edd3b6801068ac3f48ac771a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 12 Jul 2018 01:36:43 +0100 +Subject: Btrfs: fix file data corruption after cloning a range and fsync + +From: Filipe Manana + +commit bd3599a0e142cd73edd3b6801068ac3f48ac771a upstream. + +When we clone a range into a file we can end up dropping existing +extent maps (or trimming them) and replacing them with new ones if the +range to be cloned overlaps with a range in the destination inode. +When that happens we add the new extent maps to the list of modified +extents in the inode's extent map tree, so that a "fast" fsync (the flag +BTRFS_INODE_NEEDS_FULL_SYNC not set in the inode) will see the extent maps +and log corresponding extent items. However, at the end of range cloning +operation we do truncate all the pages in the affected range (in order to +ensure future reads will not get stale data). Sometimes this truncation +will release the corresponding extent maps besides the pages from the page +cache. If this happens, then a "fast" fsync operation will miss logging +some extent items, because it relies exclusively on the extent maps being +present in the inode's extent tree, leading to data loss/corruption if +the fsync ends up using the same transaction used by the clone operation +(that transaction was not committed in the meanwhile). An extent map is +released through the callback btrfs_invalidatepage(), which gets called by +truncate_inode_pages_range(), and it calls __btrfs_releasepage(). The +later ends up calling try_release_extent_mapping() which will release the +extent map if some conditions are met, like the file size being greater +than 16Mb, gfp flags allow blocking and the range not being locked (which +is the case during the clone operation) nor being the extent map flagged +as pinned (also the case for cloning). + +The following example, turned into a test for fstests, reproduces the +issue: + + $ mkfs.btrfs -f /dev/sdb + $ mount /dev/sdb /mnt + + $ xfs_io -f -c "pwrite -S 0x18 9000K 6908K" /mnt/foo + $ xfs_io -f -c "pwrite -S 0x20 2572K 156K" /mnt/bar + + $ xfs_io -c "fsync" /mnt/bar + # reflink destination offset corresponds to the size of file bar, + # 2728Kb minus 4Kb. + $ xfs_io -c ""reflink ${SCRATCH_MNT}/foo 0 2724K 15908K" /mnt/bar + $ xfs_io -c "fsync" /mnt/bar + + $ md5sum /mnt/bar + 95a95813a8c2abc9aa75a6c2914a077e /mnt/bar + + + + $ mount /dev/sdb /mnt + $ md5sum /mnt/bar + 207fd8d0b161be8a84b945f0df8d5f8d /mnt/bar + # digest should be 95a95813a8c2abc9aa75a6c2914a077e like before the + # power failure + +In the above example, the destination offset of the clone operation +corresponds to the size of the "bar" file minus 4Kb. So during the clone +operation, the extent map covering the range from 2572Kb to 2728Kb gets +trimmed so that it ends at offset 2724Kb, and a new extent map covering +the range from 2724Kb to 11724Kb is created. So at the end of the clone +operation when we ask to truncate the pages in the range from 2724Kb to +2724Kb + 15908Kb, the page invalidation callback ends up removing the new +extent map (through try_release_extent_mapping()) when the page at offset +2724Kb is passed to that callback. + +Fix this by setting the bit BTRFS_INODE_NEEDS_FULL_SYNC whenever an extent +map is removed at try_release_extent_mapping(), forcing the next fsync to +search for modified extents in the fs/subvolume tree instead of relying on +the presence of extent maps in memory. This way we can continue doing a +"fast" fsync if the destination range of a clone operation does not +overlap with an existing range or if any of the criteria necessary to +remove an extent map at try_release_extent_mapping() is not met (file +size not bigger then 16Mb or gfp flags do not allow blocking). + +CC: stable@vger.kernel.org # 3.16+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -4245,6 +4245,7 @@ int try_release_extent_mapping(struct ex + struct extent_map *em; + u64 start = page_offset(page); + u64 end = start + PAGE_SIZE - 1; ++ struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); + + if (gfpflags_allow_blocking(mask) && + page->mapping->host->i_size > SZ_16M) { +@@ -4267,6 +4268,8 @@ int try_release_extent_mapping(struct ex + extent_map_end(em) - 1, + EXTENT_LOCKED | EXTENT_WRITEBACK, + 0, NULL)) { ++ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, ++ &btrfs_inode->runtime_flags); + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); diff --git a/queue-4.17/i2c-imx-fix-reinit_completion-use.patch b/queue-4.17/i2c-imx-fix-reinit_completion-use.patch new file mode 100644 index 00000000000..e92bff2cd80 --- /dev/null +++ b/queue-4.17/i2c-imx-fix-reinit_completion-use.patch @@ -0,0 +1,53 @@ +From 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff Mon Sep 17 00:00:00 2001 +From: Esben Haabendal +Date: Mon, 9 Jul 2018 11:43:01 +0200 +Subject: i2c: imx: Fix reinit_completion() use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Esben Haabendal + +commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream. + +Make sure to call reinit_completion() before dma is started to avoid race +condition where reinit_completion() is called after complete() and before +wait_for_completion_timeout(). + +Signed-off-by: Esben Haabendal +Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver") +Reviewed-by: Uwe Kleine-König +Signed-off-by: Wolfram Sang +Cc: stable@kernel.org +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + drivers/i2c/busses/i2c-imx.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-imx.c ++++ b/drivers/i2c/busses/i2c-imx.c +@@ -377,6 +377,7 @@ static int i2c_imx_dma_xfer(struct imx_i + goto err_desc; + } + ++ reinit_completion(&dma->cmd_complete); + txdesc->callback = i2c_imx_dma_callback; + txdesc->callback_param = i2c_imx; + if (dma_submit_error(dmaengine_submit(txdesc))) { +@@ -631,7 +632,6 @@ static int i2c_imx_dma_write(struct imx_ + * The first byte must be transmitted by the CPU. + */ + imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR); +- reinit_completion(&i2c_imx->dma->cmd_complete); + time_left = wait_for_completion_timeout( + &i2c_imx->dma->cmd_complete, + msecs_to_jiffies(DMA_TIMEOUT)); +@@ -690,7 +690,6 @@ static int i2c_imx_dma_read(struct imx_i + if (result) + return result; + +- reinit_completion(&i2c_imx->dma->cmd_complete); + time_left = wait_for_completion_timeout( + &i2c_imx->dma->cmd_complete, + msecs_to_jiffies(DMA_TIMEOUT)); diff --git a/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch b/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch new file mode 100644 index 00000000000..aede6cf62d0 --- /dev/null +++ b/queue-4.17/ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch @@ -0,0 +1,103 @@ +From 73c8d8945505acdcbae137c2e00a1232e0be709f Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sat, 14 Jul 2018 01:28:15 +0900 +Subject: ring_buffer: tracing: Inherit the tracing setting to next ring buffer + +From: Masami Hiramatsu + +commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream. + +Maintain the tracing on/off setting of the ring_buffer when switching +to the trace buffer snapshot. + +Taking a snapshot is done by swapping the backup ring buffer +(max_tr_buffer). But since the tracing on/off setting is defined +by the ring buffer, when swapping it, the tracing on/off setting +can also be changed. This causes a strange result like below: + + /sys/kernel/debug/tracing # cat tracing_on + 1 + /sys/kernel/debug/tracing # echo 0 > tracing_on + /sys/kernel/debug/tracing # cat tracing_on + 0 + /sys/kernel/debug/tracing # echo 1 > snapshot + /sys/kernel/debug/tracing # cat tracing_on + 1 + /sys/kernel/debug/tracing # echo 1 > snapshot + /sys/kernel/debug/tracing # cat tracing_on + 0 + +We don't touch tracing_on, but snapshot changes tracing_on +setting each time. This is an anomaly, because user doesn't know +that each "ring_buffer" stores its own tracing-enable state and +the snapshot is done by swapping ring buffers. + +Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox + +Cc: Ingo Molnar +Cc: Shuah Khan +Cc: Tom Zanussi +Cc: Hiraku Toyooka +Cc: stable@vger.kernel.org +Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace") +Signed-off-by: Masami Hiramatsu +[ Updated commit log and comment in the code ] +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ring_buffer.h | 1 + + kernel/trace/ring_buffer.c | 16 ++++++++++++++++ + kernel/trace/trace.c | 6 ++++++ + 3 files changed, 23 insertions(+) + +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ri + void ring_buffer_record_off(struct ring_buffer *buffer); + void ring_buffer_record_on(struct ring_buffer *buffer); + int ring_buffer_record_is_on(struct ring_buffer *buffer); ++int ring_buffer_record_is_set_on(struct ring_buffer *buffer); + void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); + void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3227,6 +3227,22 @@ int ring_buffer_record_is_on(struct ring + } + + /** ++ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable ++ * @buffer: The ring buffer to see if write is set enabled ++ * ++ * Returns true if the ring buffer is set writable by ring_buffer_record_on(). ++ * Note that this does NOT mean it is in a writable state. ++ * ++ * It may return true when the ring buffer has been disabled by ++ * ring_buffer_record_disable(), as that is a temporary disabling of ++ * the ring buffer. ++ */ ++int ring_buffer_record_is_set_on(struct ring_buffer *buffer) ++{ ++ return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); ++} ++ ++/** + * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer + * @buffer: The ring buffer to stop writes to. + * @cpu: The CPU buffer to stop +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -1375,6 +1375,12 @@ update_max_tr(struct trace_array *tr, st + + arch_spin_lock(&tr->max_lock); + ++ /* Inherit the recordable setting from trace_buffer */ ++ if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) ++ ring_buffer_record_on(tr->max_buffer.buffer); ++ else ++ ring_buffer_record_off(tr->max_buffer.buffer); ++ + buf = tr->trace_buffer.buffer; + tr->trace_buffer.buffer = tr->max_buffer.buffer; + tr->max_buffer.buffer = buf; diff --git a/queue-4.17/series b/queue-4.17/series index 6036d822190..103139ba64b 100644 --- a/queue-4.17/series +++ b/queue-4.17/series @@ -9,3 +9,7 @@ perf-x86-intel-uncore-fix-hardcoded-index-of-broadwell-extra-pci-devices.patch nohz-fix-local_timer_softirq_pending.patch nohz-fix-missing-tick-reprogram-when-interrupting-an-inline-softirq.patch netlink-don-t-shift-on-64-for-ngroups.patch +xfs-more-robust-inode-extent-count-validation.patch +ring_buffer-tracing-inherit-the-tracing-setting-to-next-ring-buffer.patch +i2c-imx-fix-reinit_completion-use.patch +btrfs-fix-file-data-corruption-after-cloning-a-range-and-fsync.patch diff --git a/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch b/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch new file mode 100644 index 00000000000..fc01dbe93a7 --- /dev/null +++ b/queue-4.17/xfs-more-robust-inode-extent-count-validation.patch @@ -0,0 +1,141 @@ +From 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Thu, 21 Jun 2018 23:25:57 -0700 +Subject: xfs: More robust inode extent count validation + +From: Dave Chinner + +commit 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 upstream. + +When the inode is in extent format, it can't have more extents that +fit in the inode fork. We don't currenty check this, and so this +corruption goes unnoticed by the inode verifiers. This can lead to +crashes operating on invalid in-memory structures. + +Attempts to access such a inode will now error out in the verifier +rather than allowing modification operations to proceed. + +Reported-by: Wen Xu +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +[darrick: fix a typedef, add some braces and breaks to shut up compiler warnings] +Signed-off-by: Darrick J. Wong +Cc: Yuki Machida +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/libxfs/xfs_format.h | 3 + + fs/xfs/libxfs/xfs_inode_buf.c | 76 +++++++++++++++++++++++++----------------- + 2 files changed, 50 insertions(+), 29 deletions(-) + +--- a/fs/xfs/libxfs/xfs_format.h ++++ b/fs/xfs/libxfs/xfs_format.h +@@ -971,6 +971,9 @@ typedef enum xfs_dinode_fmt { + XFS_DFORK_DSIZE(dip, mp) : \ + XFS_DFORK_ASIZE(dip, mp)) + ++#define XFS_DFORK_MAXEXT(dip, mp, w) \ ++ (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec)) ++ + /* + * Return pointers to the data or attribute forks. + */ +--- a/fs/xfs/libxfs/xfs_inode_buf.c ++++ b/fs/xfs/libxfs/xfs_inode_buf.c +@@ -391,6 +391,47 @@ xfs_log_dinode_to_disk( + } + } + ++static xfs_failaddr_t ++xfs_dinode_verify_fork( ++ struct xfs_dinode *dip, ++ struct xfs_mount *mp, ++ int whichfork) ++{ ++ uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); ++ ++ switch (XFS_DFORK_FORMAT(dip, whichfork)) { ++ case XFS_DINODE_FMT_LOCAL: ++ /* ++ * no local regular files yet ++ */ ++ if (whichfork == XFS_DATA_FORK) { ++ if (S_ISREG(be16_to_cpu(dip->di_mode))) ++ return __this_address; ++ if (be64_to_cpu(dip->di_size) > ++ XFS_DFORK_SIZE(dip, mp, whichfork)) ++ return __this_address; ++ } ++ if (di_nextents) ++ return __this_address; ++ break; ++ case XFS_DINODE_FMT_EXTENTS: ++ if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) ++ return __this_address; ++ break; ++ case XFS_DINODE_FMT_BTREE: ++ if (whichfork == XFS_ATTR_FORK) { ++ if (di_nextents > MAXAEXTNUM) ++ return __this_address; ++ } else if (di_nextents > MAXEXTNUM) { ++ return __this_address; ++ } ++ break; ++ default: ++ return __this_address; ++ } ++ return NULL; ++} ++ + xfs_failaddr_t + xfs_dinode_verify( + struct xfs_mount *mp, +@@ -457,24 +498,9 @@ xfs_dinode_verify( + case S_IFREG: + case S_IFLNK: + case S_IFDIR: +- switch (dip->di_format) { +- case XFS_DINODE_FMT_LOCAL: +- /* +- * no local regular files yet +- */ +- if (S_ISREG(mode)) +- return __this_address; +- if (di_size > XFS_DFORK_DSIZE(dip, mp)) +- return __this_address; +- if (dip->di_nextents) +- return __this_address; +- /* fall through */ +- case XFS_DINODE_FMT_EXTENTS: +- case XFS_DINODE_FMT_BTREE: +- break; +- default: +- return __this_address; +- } ++ fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); ++ if (fa) ++ return fa; + break; + case 0: + /* Uninitialized inode ok. */ +@@ -484,17 +510,9 @@ xfs_dinode_verify( + } + + if (XFS_DFORK_Q(dip)) { +- switch (dip->di_aformat) { +- case XFS_DINODE_FMT_LOCAL: +- if (dip->di_anextents) +- return __this_address; +- /* fall through */ +- case XFS_DINODE_FMT_EXTENTS: +- case XFS_DINODE_FMT_BTREE: +- break; +- default: +- return __this_address; +- } ++ fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); ++ if (fa) ++ return fa; + } else { + /* + * If there is no fork offset, this may be a freshly-made inode