]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.17-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 1 Jul 2018 14:48:00 +0000 (16:48 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 1 Jul 2018 14:48:00 +0000 (16:48 +0200)
added patches:
block-fix-cloning-of-requests-with-a-special-payload.patch
block-fix-transfer-when-chunk-sectors-exceeds-max.patch
dm-prevent-dax-mounts-if-not-supported.patch
dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch
dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
pmem-only-set-queue_flag_dax-for-fsdax-mode.patch
selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch
x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch
x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch

13 files changed:
queue-4.17/block-fix-cloning-of-requests-with-a-special-payload.patch [new file with mode: 0644]
queue-4.17/block-fix-transfer-when-chunk-sectors-exceeds-max.patch [new file with mode: 0644]
queue-4.17/dm-prevent-dax-mounts-if-not-supported.patch [new file with mode: 0644]
queue-4.17/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch [new file with mode: 0644]
queue-4.17/dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch [new file with mode: 0644]
queue-4.17/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch [new file with mode: 0644]
queue-4.17/pmem-only-set-queue_flag_dax-for-fsdax-mode.patch [new file with mode: 0644]
queue-4.17/selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch [new file with mode: 0644]
queue-4.17/series
queue-4.17/x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch [new file with mode: 0644]
queue-4.17/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch [new file with mode: 0644]
queue-4.17/x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch [new file with mode: 0644]
queue-4.17/x86-mm-don-t-free-p4d-table-when-it-is-folded-at-runtime.patch [deleted file]

diff --git a/queue-4.17/block-fix-cloning-of-requests-with-a-special-payload.patch b/queue-4.17/block-fix-cloning-of-requests-with-a-special-payload.patch
new file mode 100644 (file)
index 0000000..8036ce9
--- /dev/null
@@ -0,0 +1,54 @@
+From 297ba57dcdec7ea37e702bcf1a577ac32a034e21 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Wed, 27 Jun 2018 12:55:18 -0700
+Subject: block: Fix cloning of requests with a special payload
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 297ba57dcdec7ea37e702bcf1a577ac32a034e21 upstream.
+
+This patch avoids that removing a path controlled by the dm-mpath driver
+while mkfs is running triggers the following kernel bug:
+
+    kernel BUG at block/blk-core.c:3347!
+    invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+    CPU: 20 PID: 24369 Comm: mkfs.ext4 Not tainted 4.18.0-rc1-dbg+ #2
+    RIP: 0010:blk_end_request_all+0x68/0x70
+    Call Trace:
+     <IRQ>
+     dm_softirq_done+0x326/0x3d0 [dm_mod]
+     blk_done_softirq+0x19b/0x1e0
+     __do_softirq+0x128/0x60d
+     irq_exit+0x100/0x110
+     smp_call_function_single_interrupt+0x90/0x330
+     call_function_single_interrupt+0xf/0x20
+     </IRQ>
+
+Fixes: f9d03f96b988 ("block: improve handling of the magic discard payload")
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -3487,6 +3487,10 @@ static void __blk_rq_prep_clone(struct r
+       dst->cpu = src->cpu;
+       dst->__sector = blk_rq_pos(src);
+       dst->__data_len = blk_rq_bytes(src);
++      if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
++              dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
++              dst->special_vec = src->special_vec;
++      }
+       dst->nr_phys_segments = src->nr_phys_segments;
+       dst->ioprio = src->ioprio;
+       dst->extra_len = src->extra_len;
diff --git a/queue-4.17/block-fix-transfer-when-chunk-sectors-exceeds-max.patch b/queue-4.17/block-fix-transfer-when-chunk-sectors-exceeds-max.patch
new file mode 100644 (file)
index 0000000..bbf450b
--- /dev/null
@@ -0,0 +1,38 @@
+From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Tue, 26 Jun 2018 09:14:58 -0600
+Subject: block: Fix transfer when chunk sectors exceeds max
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 upstream.
+
+A device may have boundary restrictions where the number of sectors
+between boundaries exceeds its max transfer size. In this case, we need
+to cap the max size to the smaller of the two limits.
+
+Reported-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Tested-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/blkdev.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1124,8 +1124,8 @@ static inline unsigned int blk_max_size_
+       if (!q->limits.chunk_sectors)
+               return q->limits.max_sectors;
+-      return q->limits.chunk_sectors -
+-                      (offset & (q->limits.chunk_sectors - 1));
++      return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
++                      (offset & (q->limits.chunk_sectors - 1))));
+ }
+ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
diff --git a/queue-4.17/dm-prevent-dax-mounts-if-not-supported.patch b/queue-4.17/dm-prevent-dax-mounts-if-not-supported.patch
new file mode 100644 (file)
index 0000000..9c26bfd
--- /dev/null
@@ -0,0 +1,72 @@
+From dbc626597c39b24cefce09fbd8e9dea85869a801 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Tue, 26 Jun 2018 16:30:41 -0600
+Subject: dm: prevent DAX mounts if not supported
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit dbc626597c39b24cefce09fbd8e9dea85869a801 upstream.
+
+Currently device_supports_dax() just checks to see if the QUEUE_FLAG_DAX
+flag is set on the device's request queue to decide whether or not the
+device supports filesystem DAX.  Really we should be using
+bdev_dax_supported() like filesystems do at mount time.  This performs
+other tests like checking to make sure the dax_direct_access() path works.
+
+We also explicitly clear QUEUE_FLAG_DAX on the DM device's request queue if
+any of the underlying devices do not support DAX.  This makes the handling
+of QUEUE_FLAG_DAX consistent with the setting/clearing of most other flags
+in dm_table_set_restrictions().
+
+Now that bdev_dax_supported() explicitly checks for QUEUE_FLAG_DAX, this
+will ensure that filesystems built upon DM devices will only be able to
+mount with DAX if all underlying devices also support DAX.
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support")
+Cc: stable@vger.kernel.org
+Acked-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-table.c |    7 ++++---
+ drivers/md/dm.c       |    3 +--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -885,9 +885,7 @@ EXPORT_SYMBOL_GPL(dm_table_set_type);
+ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+                              sector_t start, sector_t len, void *data)
+ {
+-      struct request_queue *q = bdev_get_queue(dev->bdev);
+-
+-      return q && blk_queue_dax(q);
++      return bdev_dax_supported(dev->bdev, PAGE_SIZE);
+ }
+ static bool dm_table_supports_dax(struct dm_table *t)
+@@ -1907,6 +1905,9 @@ void dm_table_set_restrictions(struct dm
+       if (dm_table_supports_dax(t))
+               blk_queue_flag_set(QUEUE_FLAG_DAX, q);
++      else
++              blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
++
+       if (dm_table_supports_dax_write_cache(t))
+               dax_write_cache(t->md->dax_dev, true);
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1056,8 +1056,7 @@ static long dm_dax_direct_access(struct
+       if (len < 1)
+               goto out;
+       nr_pages = min(len, nr_pages);
+-      if (ti->type->direct_access)
+-              ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
++      ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
+  out:
+       dm_put_live_table(md, srcu_idx);
diff --git a/queue-4.17/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch b/queue-4.17/dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
new file mode 100644 (file)
index 0000000..abcc451
--- /dev/null
@@ -0,0 +1,92 @@
+From a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Tue, 26 Jun 2018 12:04:23 -0400
+Subject: dm thin: handle running out of data space vs concurrent discard
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 upstream.
+
+Discards issued to a DM thin device can complete to userspace (via
+fstrim) _before_ the metadata changes associated with the discards is
+reflected in the thinp superblock (e.g. free blocks).  As such, if a
+user constructs a test that loops repeatedly over these steps, block
+allocation can fail due to discards not having completed yet:
+1) fill thin device via filesystem file
+2) remove file
+3) fstrim
+
+From initial report, here:
+https://www.redhat.com/archives/dm-devel/2018-April/msg00022.html
+
+"The root cause of this issue is that dm-thin will first remove
+mapping and increase corresponding blocks' reference count to prevent
+them from being reused before DISCARD bios get processed by the
+underlying layers. However. increasing blocks' reference count could
+also increase the nr_allocated_this_transaction in struct sm_disk
+which makes smd->old_ll.nr_allocated +
+smd->nr_allocated_this_transaction bigger than smd->old_ll.nr_blocks.
+In this case, alloc_data_block() will never commit metadata to reset
+the begin pointer of struct sm_disk, because sm_disk_get_nr_free()
+always return an underflow value."
+
+While there is room for improvement to the space-map accounting that
+thinp is making use of: the reality is this test is inherently racey and
+will result in the previous iteration's fstrim's discard(s) completing
+vs concurrent block allocation, via dd, in the next iteration of the
+loop.
+
+No amount of space map accounting improvements will be able to allow
+user's to use a block before a discard of that block has completed.
+
+So the best we can really do is allow DM thinp to gracefully handle such
+aggressive use of all the pool's data by degrading the pool into
+out-of-data-space (OODS) mode.  We _should_ get that behaviour already
+(if space map accounting didn't falsely cause alloc_data_block() to
+believe free space was available).. but short of that we handle the
+current reality that dm_pool_alloc_data_block() can return -ENOSPC.
+
+Reported-by: Dennis Yang <dennisyang@qnap.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -1385,6 +1385,8 @@ static void schedule_external_copy(struc
+ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
++static void requeue_bios(struct pool *pool);
++
+ static void check_for_space(struct pool *pool)
+ {
+       int r;
+@@ -1397,8 +1399,10 @@ static void check_for_space(struct pool
+       if (r)
+               return;
+-      if (nr_free)
++      if (nr_free) {
+               set_pool_mode(pool, PM_WRITE);
++              requeue_bios(pool);
++      }
+ }
+ /*
+@@ -1475,7 +1479,10 @@ static int alloc_data_block(struct thin_
+       r = dm_pool_alloc_data_block(pool->pmd, result);
+       if (r) {
+-              metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
++              if (r == -ENOSPC)
++                      set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
++              else
++                      metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+               return r;
+       }
diff --git a/queue-4.17/dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch b/queue-4.17/dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch
new file mode 100644 (file)
index 0000000..8cfb324
--- /dev/null
@@ -0,0 +1,45 @@
+From f21c601a2bb319ec19eb4562eadc7797d90fd90e Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 15 Jun 2018 09:35:33 -0400
+Subject: dm: use bio_split() when splitting out the already processed bio
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit f21c601a2bb319ec19eb4562eadc7797d90fd90e upstream.
+
+Use of bio_clone_bioset() is inefficient if there is no need to clone
+the original bio's bio_vec array.  Best to use the bio_clone_fast()
+variant.  Also, just using bio_advance() is only part of what is needed
+to properly setup the clone -- it doesn't account for the various
+bio_integrity() related work that also needs to be performed (see
+bio_split).
+
+Address both of these issues by switching from bio_clone_bioset() to
+bio_split().
+
+Fixes: 18a25da8 ("dm: ensure bio submission follows a depth-first tree walk")
+Cc: stable@vger.kernel.org # 4.15+, requires removal of '&' before md->queue->bio_split
+Reported-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1581,10 +1581,9 @@ static blk_qc_t __split_and_process_bio(
+                                * the usage of io->orig_bio in dm_remap_zone_report()
+                                * won't be affected by this reassignment.
+                                */
+-                              struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
+-                                                               md->queue->bio_split);
++                              struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
++                                                        GFP_NOIO, md->queue->bio_split);
+                               ci.io->orig_bio = b;
+-                              bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
+                               bio_chain(b, bio);
+                               ret = generic_make_request(bio);
+                               break;
diff --git a/queue-4.17/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch b/queue-4.17/dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
new file mode 100644 (file)
index 0000000..95d3711
--- /dev/null
@@ -0,0 +1,125 @@
+From 2d0b2d64d325e22939d9db3ba784f1236459ed98 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Fri, 22 Jun 2018 08:09:11 -0700
+Subject: dm zoned: avoid triggering reclaim from inside dmz_map()
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 2d0b2d64d325e22939d9db3ba784f1236459ed98 upstream.
+
+This patch avoids that lockdep reports the following:
+
+======================================================
+WARNING: possible circular locking dependency detected
+4.18.0-rc1 #62 Not tainted
+------------------------------------------------------
+kswapd0/84 is trying to acquire lock:
+00000000c313516d (&xfs_nondir_ilock_class){++++}, at: xfs_free_eofblocks+0xa2/0x1e0
+
+but task is already holding lock:
+00000000591c83ae (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #2 (fs_reclaim){+.+.}:
+  kmem_cache_alloc+0x2c/0x2b0
+  radix_tree_node_alloc.constprop.19+0x3d/0xc0
+  __radix_tree_create+0x161/0x1c0
+  __radix_tree_insert+0x45/0x210
+  dmz_map+0x245/0x2d0 [dm_zoned]
+  __map_bio+0x40/0x260
+  __split_and_process_non_flush+0x116/0x220
+  __split_and_process_bio+0x81/0x180
+  __dm_make_request.isra.32+0x5a/0x100
+  generic_make_request+0x36e/0x690
+  submit_bio+0x6c/0x140
+  mpage_readpages+0x19e/0x1f0
+  read_pages+0x6d/0x1b0
+  __do_page_cache_readahead+0x21b/0x2d0
+  force_page_cache_readahead+0xc4/0x100
+  generic_file_read_iter+0x7c6/0xd20
+  __vfs_read+0x102/0x180
+  vfs_read+0x9b/0x140
+  ksys_read+0x55/0xc0
+  do_syscall_64+0x5a/0x1f0
+  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+-> #1 (&dmz->chunk_lock){+.+.}:
+  dmz_map+0x133/0x2d0 [dm_zoned]
+  __map_bio+0x40/0x260
+  __split_and_process_non_flush+0x116/0x220
+  __split_and_process_bio+0x81/0x180
+  __dm_make_request.isra.32+0x5a/0x100
+  generic_make_request+0x36e/0x690
+  submit_bio+0x6c/0x140
+  _xfs_buf_ioapply+0x31c/0x590
+  xfs_buf_submit_wait+0x73/0x520
+  xfs_buf_read_map+0x134/0x2f0
+  xfs_trans_read_buf_map+0xc3/0x580
+  xfs_read_agf+0xa5/0x1e0
+  xfs_alloc_read_agf+0x59/0x2b0
+  xfs_alloc_pagf_init+0x27/0x60
+  xfs_bmap_longest_free_extent+0x43/0xb0
+  xfs_bmap_btalloc_nullfb+0x7f/0xf0
+  xfs_bmap_btalloc+0x428/0x7c0
+  xfs_bmapi_write+0x598/0xcc0
+  xfs_iomap_write_allocate+0x15a/0x330
+  xfs_map_blocks+0x1cf/0x3f0
+  xfs_do_writepage+0x15f/0x7b0
+  write_cache_pages+0x1ca/0x540
+  xfs_vm_writepages+0x65/0xa0
+  do_writepages+0x48/0xf0
+  __writeback_single_inode+0x58/0x730
+  writeback_sb_inodes+0x249/0x5c0
+  wb_writeback+0x11e/0x550
+  wb_workfn+0xa3/0x670
+  process_one_work+0x228/0x670
+  worker_thread+0x3c/0x390
+  kthread+0x11c/0x140
+  ret_from_fork+0x3a/0x50
+
+-> #0 (&xfs_nondir_ilock_class){++++}:
+  down_read_nested+0x43/0x70
+  xfs_free_eofblocks+0xa2/0x1e0
+  xfs_fs_destroy_inode+0xac/0x270
+  dispose_list+0x51/0x80
+  prune_icache_sb+0x52/0x70
+  super_cache_scan+0x127/0x1a0
+  shrink_slab.part.47+0x1bd/0x590
+  shrink_node+0x3b5/0x470
+  balance_pgdat+0x158/0x3b0
+  kswapd+0x1ba/0x600
+  kthread+0x11c/0x140
+  ret_from_fork+0x3a/0x50
+
+other info that might help us debug this:
+
+Chain exists of:
+  &xfs_nondir_ilock_class --> &dmz->chunk_lock --> fs_reclaim
+
+Possible unsafe locking scenario:
+
+     CPU0                    CPU1
+     ----                    ----
+lock(fs_reclaim);
+                             lock(&dmz->chunk_lock);
+                             lock(fs_reclaim);
+lock(&xfs_nondir_ilock_class);
+
+---
+ drivers/md/dm-zoned-target.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-zoned-target.c
++++ b/drivers/md/dm-zoned-target.c
+@@ -788,7 +788,7 @@ static int dmz_ctr(struct dm_target *ti,
+       /* Chunk BIO work */
+       mutex_init(&dmz->chunk_lock);
+-      INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL);
++      INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
+       dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND,
+                                       0, dev->name);
+       if (!dmz->chunk_wq) {
diff --git a/queue-4.17/pmem-only-set-queue_flag_dax-for-fsdax-mode.patch b/queue-4.17/pmem-only-set-queue_flag_dax-for-fsdax-mode.patch
new file mode 100644 (file)
index 0000000..15ad577
--- /dev/null
@@ -0,0 +1,40 @@
+From 4557641b4c7046625c026fb809c47ef0d43ae595 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Tue, 26 Jun 2018 16:30:39 -0600
+Subject: pmem: only set QUEUE_FLAG_DAX for fsdax mode
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit 4557641b4c7046625c026fb809c47ef0d43ae595 upstream.
+
+QUEUE_FLAG_DAX is an indication that a given block device supports
+filesystem DAX and should not be set for PMEM namespaces which are in "raw"
+mode.  These namespaces lack struct page and are prevented from
+participating in filesystem DAX as of commit 569d0365f571 ("dax: require
+'struct page' by default for filesystem dax").
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Suggested-by: Mike Snitzer <snitzer@redhat.com>
+Fixes: 569d0365f571 ("dax: require 'struct page' by default for filesystem dax")
+Cc: stable@vger.kernel.org
+Acked-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pmem.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -387,7 +387,8 @@ static int pmem_attach_disk(struct devic
+       blk_queue_logical_block_size(q, pmem_sector_size(ndns));
+       blk_queue_max_hw_sectors(q, UINT_MAX);
+       blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+-      blk_queue_flag_set(QUEUE_FLAG_DAX, q);
++      if (pmem->pfn_flags & PFN_MAP)
++              blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+       q->queuedata = pmem;
+       disk = alloc_disk_node(0, nid);
diff --git a/queue-4.17/selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch b/queue-4.17/selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch
new file mode 100644 (file)
index 0000000..b7b5eae
--- /dev/null
@@ -0,0 +1,177 @@
+From 0da74120c5341389b97c4ee27487a97224999ee1 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Thu, 28 Jun 2018 20:39:54 -0400
+Subject: selinux: move user accesses in selinuxfs out of locked regions
+
+From: Jann Horn <jannh@google.com>
+
+commit 0da74120c5341389b97c4ee27487a97224999ee1 upstream.
+
+If a user is accessing a file in selinuxfs with a pointer to a userspace
+buffer that is backed by e.g. a userfaultfd, the userspace access can
+stall indefinitely, which can block fsi->mutex if it is held.
+
+For sel_read_policy(), remove the locking, since this method doesn't seem
+to access anything that requires locking.
+
+For sel_read_bool(), move the user access below the locked region.
+
+For sel_write_bool() and sel_commit_bools_write(), move the user access
+up above the locked region.
+
+Cc: stable@vger.kernel.org
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
+[PM: removed an unused variable in sel_read_policy()]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/selinux/selinuxfs.c |   78 ++++++++++++++++++-------------------------
+ 1 file changed, 33 insertions(+), 45 deletions(-)
+
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -435,22 +435,16 @@ static int sel_release_policy(struct ino
+ static ssize_t sel_read_policy(struct file *filp, char __user *buf,
+                              size_t count, loff_t *ppos)
+ {
+-      struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info;
+       struct policy_load_memory *plm = filp->private_data;
+       int ret;
+-      mutex_lock(&fsi->mutex);
+-
+       ret = avc_has_perm(&selinux_state,
+                          current_sid(), SECINITSID_SECURITY,
+                         SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL);
+       if (ret)
+-              goto out;
++              return ret;
+-      ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len);
+-out:
+-      mutex_unlock(&fsi->mutex);
+-      return ret;
++      return simple_read_from_buffer(buf, count, ppos, plm->data, plm->len);
+ }
+ static int sel_mmap_policy_fault(struct vm_fault *vmf)
+@@ -1182,25 +1176,29 @@ static ssize_t sel_read_bool(struct file
+       ret = -EINVAL;
+       if (index >= fsi->bool_num || strcmp(name,
+                                            fsi->bool_pending_names[index]))
+-              goto out;
++              goto out_unlock;
+       ret = -ENOMEM;
+       page = (char *)get_zeroed_page(GFP_KERNEL);
+       if (!page)
+-              goto out;
++              goto out_unlock;
+       cur_enforcing = security_get_bool_value(fsi->state, index);
+       if (cur_enforcing < 0) {
+               ret = cur_enforcing;
+-              goto out;
++              goto out_unlock;
+       }
+       length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing,
+                         fsi->bool_pending_values[index]);
+-      ret = simple_read_from_buffer(buf, count, ppos, page, length);
+-out:
+       mutex_unlock(&fsi->mutex);
++      ret = simple_read_from_buffer(buf, count, ppos, page, length);
++out_free:
+       free_page((unsigned long)page);
+       return ret;
++
++out_unlock:
++      mutex_unlock(&fsi->mutex);
++      goto out_free;
+ }
+ static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
+@@ -1213,6 +1211,17 @@ static ssize_t sel_write_bool(struct fil
+       unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
+       const char *name = filep->f_path.dentry->d_name.name;
++      if (count >= PAGE_SIZE)
++              return -ENOMEM;
++
++      /* No partial writes. */
++      if (*ppos != 0)
++              return -EINVAL;
++
++      page = memdup_user_nul(buf, count);
++      if (IS_ERR(page))
++              return PTR_ERR(page);
++
+       mutex_lock(&fsi->mutex);
+       length = avc_has_perm(&selinux_state,
+@@ -1227,22 +1236,6 @@ static ssize_t sel_write_bool(struct fil
+                                            fsi->bool_pending_names[index]))
+               goto out;
+-      length = -ENOMEM;
+-      if (count >= PAGE_SIZE)
+-              goto out;
+-
+-      /* No partial writes. */
+-      length = -EINVAL;
+-      if (*ppos != 0)
+-              goto out;
+-
+-      page = memdup_user_nul(buf, count);
+-      if (IS_ERR(page)) {
+-              length = PTR_ERR(page);
+-              page = NULL;
+-              goto out;
+-      }
+-
+       length = -EINVAL;
+       if (sscanf(page, "%d", &new_value) != 1)
+               goto out;
+@@ -1274,6 +1267,17 @@ static ssize_t sel_commit_bools_write(st
+       ssize_t length;
+       int new_value;
++      if (count >= PAGE_SIZE)
++              return -ENOMEM;
++
++      /* No partial writes. */
++      if (*ppos != 0)
++              return -EINVAL;
++
++      page = memdup_user_nul(buf, count);
++      if (IS_ERR(page))
++              return PTR_ERR(page);
++
+       mutex_lock(&fsi->mutex);
+       length = avc_has_perm(&selinux_state,
+@@ -1283,22 +1287,6 @@ static ssize_t sel_commit_bools_write(st
+       if (length)
+               goto out;
+-      length = -ENOMEM;
+-      if (count >= PAGE_SIZE)
+-              goto out;
+-
+-      /* No partial writes. */
+-      length = -EINVAL;
+-      if (*ppos != 0)
+-              goto out;
+-
+-      page = memdup_user_nul(buf, count);
+-      if (IS_ERR(page)) {
+-              length = PTR_ERR(page);
+-              page = NULL;
+-              goto out;
+-      }
+-
+       length = -EINVAL;
+       if (sscanf(page, "%d", &new_value) != 1)
+               goto out;
index ee8aa2830ca1f765e99fb33aa8bf7bcf90c27871..0541e96934f491f26e8933c21609d345c07246bf 100644 (file)
@@ -189,7 +189,6 @@ pwm-lpss-platform-save-restore-the-ctrl-register-over-a-suspend-resume.patch
 rbd-flush-rbd_dev-watch_dwork-after-watch-is-unregistered.patch
 mm-ksm.c-ignore-stable_flag-of-rmap_item-address-in-rmap_walk_ksm.patch
 mm-fix-devmem_is_allowed-for-sub-page-system-ram-intersections.patch
-x86-mm-don-t-free-p4d-table-when-it-is-folded-at-runtime.patch
 tracing-check-for-no-filter-when-processing-event-filters.patch
 xen-remove-unnecessary-bug_on-from-__unbind_from_irq.patch
 net-ethernet-fix-suspend-resume-in-davinci_emac.patch
@@ -208,3 +207,14 @@ revert-i2c-algo-bit-init-the-bus-to-a-known-state.patch
 i2c-gpio-initialize-scl-to-high-again.patch
 slub-fix-failure-when-we-delete-and-create-a-slab-cache.patch
 kasan-depend-on-config_slub_debug.patch
+dm-prevent-dax-mounts-if-not-supported.patch
+dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch
+pmem-only-set-queue_flag_dax-for-fsdax-mode.patch
+block-fix-transfer-when-chunk-sectors-exceeds-max.patch
+block-fix-cloning-of-requests-with-a-special-payload.patch
+x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch
+selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch
+x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch
+x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
+dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
+dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
diff --git a/queue-4.17/x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch b/queue-4.17/x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch
new file mode 100644 (file)
index 0000000..7016072
--- /dev/null
@@ -0,0 +1,130 @@
+From 124049decbb121ec32742c94fb5d9d6bed8f24d8 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 27 Jun 2018 23:26:13 -0700
+Subject: x86/e820: put !E820_TYPE_RAM regions into memblock.reserved
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 124049decbb121ec32742c94fb5d9d6bed8f24d8 upstream.
+
+There is a kernel panic that is triggered when reading /proc/kpageflags
+on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
+
+  BUG: unable to handle kernel paging request at fffffffffffffffe
+  PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
+  Oops: 0000 [#1] SMP PTI
+  CPU: 2 PID: 1728 Comm: page-types Not tainted 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014
+  RIP: 0010:stable_page_flags+0x27/0x3c0
+  Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
+  RSP: 0018:ffffbbd44111fde0 EFLAGS: 00010202
+  RAX: fffffffffffffffe RBX: 00007fffffffeff9 RCX: 0000000000000000
+  RDX: 0000000000000001 RSI: 0000000000000202 RDI: ffffed1182fff5c0
+  RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000001
+  R10: ffffbbd44111fed8 R11: 0000000000000000 R12: ffffed1182fff5c0
+  R13: 00000000000bffd7 R14: 0000000002fff5c0 R15: ffffbbd44111ff10
+  FS:  00007efc4335a500(0000) GS:ffff93a5bfc00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: fffffffffffffffe CR3: 00000000b2a58000 CR4: 00000000001406e0
+  Call Trace:
+   kpageflags_read+0xc7/0x120
+   proc_reg_read+0x3c/0x60
+   __vfs_read+0x36/0x170
+   vfs_read+0x89/0x130
+   ksys_pread64+0x71/0x90
+   do_syscall_64+0x5b/0x160
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x7efc42e75e23
+  Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
+
+According to kernel bisection, this problem became visible due to commit
+f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
+which changes how struct pages are initialized.
+
+Memblock layout affects the pfn ranges covered by node/zone.  Consider
+that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
+the default (no memmap= given) memblock layout is like below:
+
+  MEMBLOCK configuration:
+   memory size = 0x00000001fff75c00 reserved size = 0x000000000300c000
+   memory.cnt  = 0x4
+   memory[0x0]     [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
+   memory[0x1]     [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
+   memory[0x2]     [0x0000000100000000-0x000000013fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0
+   memory[0x3]     [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
+   ...
+
+If you give memmap=1G!4G (so it just covers memory[0x2]),
+the range [0x100000000-0x13fffffff] is gone:
+
+  MEMBLOCK configuration:
+   memory size = 0x00000001bff75c00 reserved size = 0x000000000300c000
+   memory.cnt  = 0x3
+   memory[0x0]     [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
+   memory[0x1]     [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
+   memory[0x2]     [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
+   ...
+
+This causes shrinking node 0's pfn range because it is calculated by the
+address range of memblock.memory.  So some of struct pages in the gap
+range are left uninitialized.
+
+We have a function zero_resv_unavail() which does zeroing the struct pages
+within the reserved unavailable range (i.e.  memblock.memory &&
+!memblock.reserved).  This patch utilizes it to cover all unavailable
+ranges by putting them into memblock.reserved.
+
+Link: http://lkml.kernel.org/r/20180615072947.GB23273@hori1.linux.bs1.fc.nec.co.jp
+Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Tested-by: Oscar Salvador <osalvador@suse.de>
+Tested-by: "Herton R. Krzesinski" <herton@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Cc: Steven Sistare <steven.sistare@oracle.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/e820.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -1246,6 +1246,7 @@ void __init e820__memblock_setup(void)
+ {
+       int i;
+       u64 end;
++      u64 addr = 0;
+       /*
+        * The bootstrap memblock region count maximum is 128 entries
+@@ -1262,13 +1263,21 @@ void __init e820__memblock_setup(void)
+               struct e820_entry *entry = &e820_table->entries[i];
+               end = entry->addr + entry->size;
++              if (addr < entry->addr)
++                      memblock_reserve(addr, entry->addr - addr);
++              addr = end;
+               if (end != (resource_size_t)end)
+                       continue;
++              /*
++               * all !E820_TYPE_RAM ranges (including gap ranges) are put
++               * into memblock.reserved to make sure that struct pages in
++               * such regions are not left uninitialized after bootup.
++               */
+               if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
+-                      continue;
+-
+-              memblock_add(entry->addr, entry->size);
++                      memblock_reserve(entry->addr, entry->size);
++              else
++                      memblock_add(entry->addr, entry->size);
+       }
+       /* Throw away partial pages: */
diff --git a/queue-4.17/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch b/queue-4.17/x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
new file mode 100644 (file)
index 0000000..8df3f09
--- /dev/null
@@ -0,0 +1,55 @@
+From cfe19577047e74cdac5826adbdc2337d8437f8fb Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Mon, 25 Jun 2018 15:08:52 +0300
+Subject: x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit cfe19577047e74cdac5826adbdc2337d8437f8fb upstream.
+
+Open-coded page table entry checks don't work correctly when we fold the
+page table level at runtime.
+
+pgd_present() on 4-level paging machine always returns true, but
+open-coded version of the check may return false-negative result and
+we silently skip the rest of the loop body in efi_call_phys_epilog().
+
+Replace open-coded checks with proper helpers.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org # v4.12+
+Fixes: 94133e46a0f5 ("x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled")
+Link: http://lkml.kernel.org/r/20180625120852.18300-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/efi/efi_64.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *
+               pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+-              if (!(pgd_val(*pgd) & _PAGE_PRESENT))
++              if (!pgd_present(*pgd))
+                       continue;
+               for (i = 0; i < PTRS_PER_P4D; i++) {
+                       p4d = p4d_offset(pgd,
+                                        pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+-                      if (!(p4d_val(*p4d) & _PAGE_PRESENT))
++                      if (!p4d_present(*p4d))
+                               continue;
+                       pud = (pud_t *)p4d_page_vaddr(*p4d);
diff --git a/queue-4.17/x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch b/queue-4.17/x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch
new file mode 100644 (file)
index 0000000..4ea80ee
--- /dev/null
@@ -0,0 +1,93 @@
+From 22cd978e598618e82c3c3348d2069184f6884182 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Jun 2018 22:45:52 -0700
+Subject: x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80"
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 22cd978e598618e82c3c3348d2069184f6884182 upstream.
+
+Commit:
+
+  8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80")
+
+was busted: my original patch had a minor conflict with
+some of the nospec changes, but "git apply" is very clever
+and silently accepted the patch by making the same changes
+to a different function in the same file.  There was obviously
+a huge offset, but "git apply" for some reason doesn't feel
+any need to say so.
+
+Move the changes to the correct function.  Now the
+test_syscall_vdso_32 selftests passes.
+
+If anyone cares to observe the original problem, try applying the
+patch at:
+
+  https://lore.kernel.org/lkml/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org/raw
+
+to the kernel at 316d097c4cd4e7f2ef50c40cff2db266593c4ec4:
+
+ - "git am" and "git apply" accept the patch without any complaints at all
+ - "patch -p1" at least prints out a message about the huge offset.
+
+Reported-by: zhijianx.li@intel.com
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org #v4.17+
+Fixes: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80")
+Link: http://lkml.kernel.org/r/6012b922485401bc42676e804171ded262fc2ef2.1530078306.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64_compat.S |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
+       pushq   %rdx                    /* pt_regs->dx */
+       pushq   %rcx                    /* pt_regs->cx */
+       pushq   $-ENOSYS                /* pt_regs->ax */
+-      pushq   %r8                     /* pt_regs->r8 */
++      pushq   $0                      /* pt_regs->r8  = 0 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
+-      pushq   %r9                     /* pt_regs->r9 */
++      pushq   $0                      /* pt_regs->r9  = 0 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
+-      pushq   %r10                    /* pt_regs->r10 */
++      pushq   $0                      /* pt_regs->r10 = 0 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
+-      pushq   %r11                    /* pt_regs->r11 */
++      pushq   $0                      /* pt_regs->r11 = 0 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
+       pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
+@@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat)
+       pushq   %rcx                    /* pt_regs->cx */
+       xorl    %ecx, %ecx              /* nospec   cx */
+       pushq   $-ENOSYS                /* pt_regs->ax */
+-      pushq   $0                      /* pt_regs->r8  = 0 */
++      pushq   %r8                     /* pt_regs->r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
+-      pushq   $0                      /* pt_regs->r9  = 0 */
++      pushq   %r9                     /* pt_regs->r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
+-      pushq   $0                      /* pt_regs->r10 = 0 */
++      pushq   %r10                    /* pt_regs->r10*/
+       xorl    %r10d, %r10d            /* nospec   r10 */
+-      pushq   $0                      /* pt_regs->r11 = 0 */
++      pushq   %r11                    /* pt_regs->r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
+       pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
diff --git a/queue-4.17/x86-mm-don-t-free-p4d-table-when-it-is-folded-at-runtime.patch b/queue-4.17/x86-mm-don-t-free-p4d-table-when-it-is-folded-at-runtime.patch
deleted file mode 100644 (file)
index b7ffa4e..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-From 0e311d237d7f3022b7dafb639b42541bfb42fe94 Mon Sep 17 00:00:00 2001
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Date: Mon, 25 Jun 2018 13:24:27 +0300
-Subject: x86/mm: Don't free P4D table when it is folded at runtime
-
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-
-commit 0e311d237d7f3022b7dafb639b42541bfb42fe94 upstream.
-
-When the P4D page table layer is folded at runtime, the p4d_free()
-should do nothing, the same as in <asm-generic/pgtable-nop4d.h>.
-
-It seems this bug should cause double-free in efi_call_phys_epilog(),
-but I don't know how to trigger that code path, so I can't confirm that
-by testing.
-
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: stable@vger.kernel.org # 4.17
-Fixes: 98219dda2ab5 ("x86/mm: Fold p4d page table layer at runtime")
-Link: http://lkml.kernel.org/r/20180625102427.15015-1-aryabinin@virtuozzo.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/include/asm/pgalloc.h |    3 +++
- 1 file changed, 3 insertions(+)
-
---- a/arch/x86/include/asm/pgalloc.h
-+++ b/arch/x86/include/asm/pgalloc.h
-@@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struc
- static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
- {
-+      if (!pgtable_l5_enabled())
-+              return;
-+
-       BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
-       free_page((unsigned long)p4d);
- }