From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 1 Jul 2018 14:47:41 +0000 (+0200)
Subject: 4.14-stable patches
X-Git-Tag: v3.18.114~12
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=46c839aa286aaf16010a9c32f817dce6f07db748;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	block-fix-cloning-of-requests-with-a-special-payload.patch
	block-fix-transfer-when-chunk-sectors-exceeds-max.patch
	dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
	dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
	x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
---

diff --git a/queue-4.14/block-fix-cloning-of-requests-with-a-special-payload.patch b/queue-4.14/block-fix-cloning-of-requests-with-a-special-payload.patch
new file mode 100644
index 00000000000..bb36f3e30b4
--- /dev/null
+++ b/queue-4.14/block-fix-cloning-of-requests-with-a-special-payload.patch
@@ -0,0 +1,54 @@
+From 297ba57dcdec7ea37e702bcf1a577ac32a034e21 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Wed, 27 Jun 2018 12:55:18 -0700
+Subject: block: Fix cloning of requests with a special payload
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 297ba57dcdec7ea37e702bcf1a577ac32a034e21 upstream.
+
+This patch avoids that removing a path controlled by the dm-mpath driver
+while mkfs is running triggers the following kernel bug:
+
+    kernel BUG at block/blk-core.c:3347!
+    invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+    CPU: 20 PID: 24369 Comm: mkfs.ext4 Not tainted 4.18.0-rc1-dbg+ #2
+    RIP: 0010:blk_end_request_all+0x68/0x70
+    Call Trace:
+     <IRQ>
+     dm_softirq_done+0x326/0x3d0 [dm_mod]
+     blk_done_softirq+0x19b/0x1e0
+     __do_softirq+0x128/0x60d
+     irq_exit+0x100/0x110
+     smp_call_function_single_interrupt+0x90/0x330
+     call_function_single_interrupt+0xf/0x20
+     </IRQ>
+
+Fixes: f9d03f96b988 ("block: improve handling of the magic discard payload")
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -3150,6 +3150,10 @@ static void __blk_rq_prep_clone(struct r
+ 	dst->cpu = src->cpu;
+ 	dst->__sector = blk_rq_pos(src);
+ 	dst->__data_len = blk_rq_bytes(src);
++	if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
++		dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
++		dst->special_vec = src->special_vec;
++	}
+ 	dst->nr_phys_segments = src->nr_phys_segments;
+ 	dst->ioprio = src->ioprio;
+ 	dst->extra_len = src->extra_len;
diff --git a/queue-4.14/block-fix-transfer-when-chunk-sectors-exceeds-max.patch b/queue-4.14/block-fix-transfer-when-chunk-sectors-exceeds-max.patch
new file mode 100644
index 00000000000..28f6016897a
--- /dev/null
+++ b/queue-4.14/block-fix-transfer-when-chunk-sectors-exceeds-max.patch
@@ -0,0 +1,38 @@
+From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Tue, 26 Jun 2018 09:14:58 -0600
+Subject: block: Fix transfer when chunk sectors exceeds max
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 upstream.
+
+A device may have boundary restrictions where the number of sectors
+between boundaries exceeds its max transfer size. In this case, we need
+to cap the max size to the smaller of the two limits.
+
+Reported-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Tested-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/blkdev.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1088,8 +1088,8 @@ static inline unsigned int blk_max_size_
+ 	if (!q->limits.chunk_sectors)
+ 		return q->limits.max_sectors;
+ 
+-	return q->limits.chunk_sectors -
+-			(offset & (q->limits.chunk_sectors - 1));
++	return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
++			(offset & (q->limits.chunk_sectors - 1))));
+ }
+ 
+ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
diff --git a/queue-4.14/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch b/queue-4.14/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
new file mode 100644
index 00000000000..d6d156d15b8
--- /dev/null
+++ b/queue-4.14/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
@@ -0,0 +1,92 @@
+From a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Tue, 26 Jun 2018 12:04:23 -0400
+Subject: dm thin: handle running out of data space vs concurrent discard
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 upstream.
+
+Discards issued to a DM thin device can complete to userspace (via
+fstrim) _before_ the metadata changes associated with the discards is
+reflected in the thinp superblock (e.g. free blocks).  As such, if a
+user constructs a test that loops repeatedly over these steps, block
+allocation can fail due to discards not having completed yet:
+1) fill thin device via filesystem file
+2) remove file
+3) fstrim
+
+From initial report, here:
+https://www.redhat.com/archives/dm-devel/2018-April/msg00022.html
+
+"The root cause of this issue is that dm-thin will first remove
+mapping and increase corresponding blocks' reference count to prevent
+them from being reused before DISCARD bios get processed by the
+underlying layers. However. increasing blocks' reference count could
+also increase the nr_allocated_this_transaction in struct sm_disk
+which makes smd->old_ll.nr_allocated +
+smd->nr_allocated_this_transaction bigger than smd->old_ll.nr_blocks.
+In this case, alloc_data_block() will never commit metadata to reset
+the begin pointer of struct sm_disk, because sm_disk_get_nr_free()
+always return an underflow value."
+
+While there is room for improvement to the space-map accounting that
+thinp is making use of: the reality is this test is inherently racey and
+will result in the previous iteration's fstrim's discard(s) completing
+vs concurrent block allocation, via dd, in the next iteration of the
+loop.
+
+No amount of space map accounting improvements will be able to allow
+user's to use a block before a discard of that block has completed.
+
+So the best we can really do is allow DM thinp to gracefully handle such
+aggressive use of all the pool's data by degrading the pool into
+out-of-data-space (OODS) mode.  We _should_ get that behaviour already
+(if space map accounting didn't falsely cause alloc_data_block() to
+believe free space was available).. but short of that we handle the
+current reality that dm_pool_alloc_data_block() can return -ENOSPC.
+
+Reported-by: Dennis Yang <dennisyang@qnap.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -1380,6 +1380,8 @@ static void schedule_external_copy(struc
+ 
+ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
+ 
++static void requeue_bios(struct pool *pool);
++
+ static void check_for_space(struct pool *pool)
+ {
+ 	int r;
+@@ -1392,8 +1394,10 @@ static void check_for_space(struct pool
+ 	if (r)
+ 		return;
+ 
+-	if (nr_free)
++	if (nr_free) {
+ 		set_pool_mode(pool, PM_WRITE);
++		requeue_bios(pool);
++	}
+ }
+ 
+ /*
+@@ -1470,7 +1474,10 @@ static int alloc_data_block(struct thin_
+ 
+ 	r = dm_pool_alloc_data_block(pool->pmd, result);
+ 	if (r) {
+-		metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
++		if (r == -ENOSPC)
++			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
++		else
++			metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ 		return r;
+ 	}
+ 
diff --git a/queue-4.14/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch b/queue-4.14/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
new file mode 100644
index 00000000000..95d37118fe1
--- /dev/null
+++ b/queue-4.14/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
@@ -0,0 +1,125 @@
+From 2d0b2d64d325e22939d9db3ba784f1236459ed98 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Fri, 22 Jun 2018 08:09:11 -0700
+Subject: dm zoned: avoid triggering reclaim from inside dmz_map()
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 2d0b2d64d325e22939d9db3ba784f1236459ed98 upstream.
+
+This patch avoids that lockdep reports the following:
+
+======================================================
+WARNING: possible circular locking dependency detected
+4.18.0-rc1 #62 Not tainted
+------------------------------------------------------
+kswapd0/84 is trying to acquire lock:
+00000000c313516d (&xfs_nondir_ilock_class){++++}, at: xfs_free_eofblocks+0xa2/0x1e0
+
+but task is already holding lock:
+00000000591c83ae (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #2 (fs_reclaim){+.+.}:
+  kmem_cache_alloc+0x2c/0x2b0
+  radix_tree_node_alloc.constprop.19+0x3d/0xc0
+  __radix_tree_create+0x161/0x1c0
+  __radix_tree_insert+0x45/0x210
+  dmz_map+0x245/0x2d0 [dm_zoned]
+  __map_bio+0x40/0x260
+  __split_and_process_non_flush+0x116/0x220
+  __split_and_process_bio+0x81/0x180
+  __dm_make_request.isra.32+0x5a/0x100
+  generic_make_request+0x36e/0x690
+  submit_bio+0x6c/0x140
+  mpage_readpages+0x19e/0x1f0
+  read_pages+0x6d/0x1b0
+  __do_page_cache_readahead+0x21b/0x2d0
+  force_page_cache_readahead+0xc4/0x100
+  generic_file_read_iter+0x7c6/0xd20
+  __vfs_read+0x102/0x180
+  vfs_read+0x9b/0x140
+  ksys_read+0x55/0xc0
+  do_syscall_64+0x5a/0x1f0
+  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+-> #1 (&dmz->chunk_lock){+.+.}:
+  dmz_map+0x133/0x2d0 [dm_zoned]
+  __map_bio+0x40/0x260
+  __split_and_process_non_flush+0x116/0x220
+  __split_and_process_bio+0x81/0x180
+  __dm_make_request.isra.32+0x5a/0x100
+  generic_make_request+0x36e/0x690
+  submit_bio+0x6c/0x140
+  _xfs_buf_ioapply+0x31c/0x590
+  xfs_buf_submit_wait+0x73/0x520
+  xfs_buf_read_map+0x134/0x2f0
+  xfs_trans_read_buf_map+0xc3/0x580
+  xfs_read_agf+0xa5/0x1e0
+  xfs_alloc_read_agf+0x59/0x2b0
+  xfs_alloc_pagf_init+0x27/0x60
+  xfs_bmap_longest_free_extent+0x43/0xb0
+  xfs_bmap_btalloc_nullfb+0x7f/0xf0
+  xfs_bmap_btalloc+0x428/0x7c0
+  xfs_bmapi_write+0x598/0xcc0
+  xfs_iomap_write_allocate+0x15a/0x330
+  xfs_map_blocks+0x1cf/0x3f0
+  xfs_do_writepage+0x15f/0x7b0
+  write_cache_pages+0x1ca/0x540
+  xfs_vm_writepages+0x65/0xa0
+  do_writepages+0x48/0xf0
+  __writeback_single_inode+0x58/0x730
+  writeback_sb_inodes+0x249/0x5c0
+  wb_writeback+0x11e/0x550
+  wb_workfn+0xa3/0x670
+  process_one_work+0x228/0x670
+  worker_thread+0x3c/0x390
+  kthread+0x11c/0x140
+  ret_from_fork+0x3a/0x50
+
+-> #0 (&xfs_nondir_ilock_class){++++}:
+  down_read_nested+0x43/0x70
+  xfs_free_eofblocks+0xa2/0x1e0
+  xfs_fs_destroy_inode+0xac/0x270
+  dispose_list+0x51/0x80
+  prune_icache_sb+0x52/0x70
+  super_cache_scan+0x127/0x1a0
+  shrink_slab.part.47+0x1bd/0x590
+  shrink_node+0x3b5/0x470
+  balance_pgdat+0x158/0x3b0
+  kswapd+0x1ba/0x600
+  kthread+0x11c/0x140
+  ret_from_fork+0x3a/0x50
+
+other info that might help us debug this:
+
+Chain exists of:
+  &xfs_nondir_ilock_class --> &dmz->chunk_lock --> fs_reclaim
+
+Possible unsafe locking scenario:
+
+     CPU0                    CPU1
+     ----                    ----
+lock(fs_reclaim);
+                             lock(&dmz->chunk_lock);
+                             lock(fs_reclaim);
+lock(&xfs_nondir_ilock_class);
+
+---
+ drivers/md/dm-zoned-target.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-zoned-target.c
++++ b/drivers/md/dm-zoned-target.c
+@@ -788,7 +788,7 @@ static int dmz_ctr(struct dm_target *ti,
+ 
+ 	/* Chunk BIO work */
+ 	mutex_init(&dmz->chunk_lock);
+-	INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL);
++	INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
+ 	dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND,
+ 					0, dev->name);
+ 	if (!dmz->chunk_wq) {
diff --git a/queue-4.14/series b/queue-4.14/series
index 14616eea38e..ced019aea5a 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -149,3 +149,8 @@ alsa-hda-realtek-fix-pop-noise-on-lenovo-p50-co.patch
 alsa-hda-realtek-add-a-quirk-for-fsc-esprimo-u9210.patch
 alsa-hda-realtek-fix-the-problem-of-two-front-mics-on-more-machines.patch
 slub-fix-failure-when-we-delete-and-create-a-slab-cache.patch
+block-fix-transfer-when-chunk-sectors-exceeds-max.patch
+block-fix-cloning-of-requests-with-a-special-payload.patch
+x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
+dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
+dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
diff --git a/queue-4.14/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch b/queue-4.14/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
new file mode 100644
index 00000000000..8df3f098981
--- /dev/null
+++ b/queue-4.14/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
@@ -0,0 +1,55 @@
+From cfe19577047e74cdac5826adbdc2337d8437f8fb Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Mon, 25 Jun 2018 15:08:52 +0300
+Subject: x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit cfe19577047e74cdac5826adbdc2337d8437f8fb upstream.
+
+Open-coded page table entry checks don't work correctly when we fold the
+page table level at runtime.
+
+pgd_present() on 4-level paging machine always returns true, but
+open-coded version of the check may return false-negative result and
+we silently skip the rest of the loop body in efi_call_phys_epilog().
+
+Replace open-coded checks with proper helpers.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org # v4.12+
+Fixes: 94133e46a0f5 ("x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled")
+Link: http://lkml.kernel.org/r/20180625120852.18300-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/efi/efi_64.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *
+ 		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+ 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+ 
+-		if (!(pgd_val(*pgd) & _PAGE_PRESENT))
++		if (!pgd_present(*pgd))
+ 			continue;
+ 
+ 		for (i = 0; i < PTRS_PER_P4D; i++) {
+ 			p4d = p4d_offset(pgd,
+ 					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+ 
+-			if (!(p4d_val(*p4d) & _PAGE_PRESENT))
++			if (!p4d_present(*p4d))
+ 				continue;
+ 
+ 			pud = (pud_t *)p4d_page_vaddr(*p4d);