--- /dev/null
+From 297ba57dcdec7ea37e702bcf1a577ac32a034e21 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Wed, 27 Jun 2018 12:55:18 -0700
+Subject: block: Fix cloning of requests with a special payload
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 297ba57dcdec7ea37e702bcf1a577ac32a034e21 upstream.
+
+This patch avoids that removing a path controlled by the dm-mpath driver
+while mkfs is running triggers the following kernel bug:
+
+ kernel BUG at block/blk-core.c:3347!
+ invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+ CPU: 20 PID: 24369 Comm: mkfs.ext4 Not tainted 4.18.0-rc1-dbg+ #2
+ RIP: 0010:blk_end_request_all+0x68/0x70
+ Call Trace:
+ <IRQ>
+ dm_softirq_done+0x326/0x3d0 [dm_mod]
+ blk_done_softirq+0x19b/0x1e0
+ __do_softirq+0x128/0x60d
+ irq_exit+0x100/0x110
+ smp_call_function_single_interrupt+0x90/0x330
+ call_function_single_interrupt+0xf/0x20
+ </IRQ>
+
+Fixes: f9d03f96b988 ("block: improve handling of the magic discard payload")
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -3487,6 +3487,10 @@ static void __blk_rq_prep_clone(struct r
+ dst->cpu = src->cpu;
+ dst->__sector = blk_rq_pos(src);
+ dst->__data_len = blk_rq_bytes(src);
++ if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
++ dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
++ dst->special_vec = src->special_vec;
++ }
+ dst->nr_phys_segments = src->nr_phys_segments;
+ dst->ioprio = src->ioprio;
+ dst->extra_len = src->extra_len;
--- /dev/null
+From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Tue, 26 Jun 2018 09:14:58 -0600
+Subject: block: Fix transfer when chunk sectors exceeds max
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 upstream.
+
+A device may have boundary restrictions where the number of sectors
+between boundaries exceeds its max transfer size. In this case, we need
+to cap the max size to the smaller of the two limits.
+
+Reported-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Tested-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/blkdev.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1124,8 +1124,8 @@ static inline unsigned int blk_max_size_
+ if (!q->limits.chunk_sectors)
+ return q->limits.max_sectors;
+
+- return q->limits.chunk_sectors -
+- (offset & (q->limits.chunk_sectors - 1));
++ return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
++ (offset & (q->limits.chunk_sectors - 1))));
+ }
+
+ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
--- /dev/null
+From dbc626597c39b24cefce09fbd8e9dea85869a801 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Tue, 26 Jun 2018 16:30:41 -0600
+Subject: dm: prevent DAX mounts if not supported
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit dbc626597c39b24cefce09fbd8e9dea85869a801 upstream.
+
+Currently device_supports_dax() just checks to see if the QUEUE_FLAG_DAX
+flag is set on the device's request queue to decide whether or not the
+device supports filesystem DAX. Really we should be using
+bdev_dax_supported() like filesystems do at mount time. This performs
+other tests like checking to make sure the dax_direct_access() path works.
+
+We also explicitly clear QUEUE_FLAG_DAX on the DM device's request queue if
+any of the underlying devices do not support DAX. This makes the handling
+of QUEUE_FLAG_DAX consistent with the setting/clearing of most other flags
+in dm_table_set_restrictions().
+
+Now that bdev_dax_supported() explicitly checks for QUEUE_FLAG_DAX, this
+will ensure that filesystems built upon DM devices will only be able to
+mount with DAX if all underlying devices also support DAX.
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support")
+Cc: stable@vger.kernel.org
+Acked-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-table.c | 7 ++++---
+ drivers/md/dm.c | 3 +--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -885,9 +885,7 @@ EXPORT_SYMBOL_GPL(dm_table_set_type);
+ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+- struct request_queue *q = bdev_get_queue(dev->bdev);
+-
+- return q && blk_queue_dax(q);
++ return bdev_dax_supported(dev->bdev, PAGE_SIZE);
+ }
+
+ static bool dm_table_supports_dax(struct dm_table *t)
+@@ -1907,6 +1905,9 @@ void dm_table_set_restrictions(struct dm
+
+ if (dm_table_supports_dax(t))
+ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
++ else
++ blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
++
+ if (dm_table_supports_dax_write_cache(t))
+ dax_write_cache(t->md->dax_dev, true);
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1056,8 +1056,7 @@ static long dm_dax_direct_access(struct
+ if (len < 1)
+ goto out;
+ nr_pages = min(len, nr_pages);
+- if (ti->type->direct_access)
+- ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
++ ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
+
+ out:
+ dm_put_live_table(md, srcu_idx);
--- /dev/null
+From a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Tue, 26 Jun 2018 12:04:23 -0400
+Subject: dm thin: handle running out of data space vs concurrent discard
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 upstream.
+
+Discards issued to a DM thin device can complete to userspace (via
+fstrim) _before_ the metadata changes associated with the discards is
+reflected in the thinp superblock (e.g. free blocks). As such, if a
+user constructs a test that loops repeatedly over these steps, block
+allocation can fail due to discards not having completed yet:
+1) fill thin device via filesystem file
+2) remove file
+3) fstrim
+
+From initial report, here:
+https://www.redhat.com/archives/dm-devel/2018-April/msg00022.html
+
+"The root cause of this issue is that dm-thin will first remove
+mapping and increase corresponding blocks' reference count to prevent
+them from being reused before DISCARD bios get processed by the
+underlying layers. However. increasing blocks' reference count could
+also increase the nr_allocated_this_transaction in struct sm_disk
+which makes smd->old_ll.nr_allocated +
+smd->nr_allocated_this_transaction bigger than smd->old_ll.nr_blocks.
+In this case, alloc_data_block() will never commit metadata to reset
+the begin pointer of struct sm_disk, because sm_disk_get_nr_free()
+always return an underflow value."
+
+While there is room for improvement to the space-map accounting that
+thinp is making use of: the reality is this test is inherently racey and
+will result in the previous iteration's fstrim's discard(s) completing
+vs concurrent block allocation, via dd, in the next iteration of the
+loop.
+
+No amount of space map accounting improvements will be able to allow
+user's to use a block before a discard of that block has completed.
+
+So the best we can really do is allow DM thinp to gracefully handle such
+aggressive use of all the pool's data by degrading the pool into
+out-of-data-space (OODS) mode. We _should_ get that behaviour already
+(if space map accounting didn't falsely cause alloc_data_block() to
+believe free space was available).. but short of that we handle the
+current reality that dm_pool_alloc_data_block() can return -ENOSPC.
+
+Reported-by: Dennis Yang <dennisyang@qnap.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -1385,6 +1385,8 @@ static void schedule_external_copy(struc
+
+ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
+
++static void requeue_bios(struct pool *pool);
++
+ static void check_for_space(struct pool *pool)
+ {
+ int r;
+@@ -1397,8 +1399,10 @@ static void check_for_space(struct pool
+ if (r)
+ return;
+
+- if (nr_free)
++ if (nr_free) {
+ set_pool_mode(pool, PM_WRITE);
++ requeue_bios(pool);
++ }
+ }
+
+ /*
+@@ -1475,7 +1479,10 @@ static int alloc_data_block(struct thin_
+
+ r = dm_pool_alloc_data_block(pool->pmd, result);
+ if (r) {
+- metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
++ if (r == -ENOSPC)
++ set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
++ else
++ metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ return r;
+ }
+
--- /dev/null
+From f21c601a2bb319ec19eb4562eadc7797d90fd90e Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 15 Jun 2018 09:35:33 -0400
+Subject: dm: use bio_split() when splitting out the already processed bio
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit f21c601a2bb319ec19eb4562eadc7797d90fd90e upstream.
+
+Use of bio_clone_bioset() is inefficient if there is no need to clone
+the original bio's bio_vec array. Best to use the bio_clone_fast()
+variant. Also, just using bio_advance() is only part of what is needed
+to properly setup the clone -- it doesn't account for the various
+bio_integrity() related work that also needs to be performed (see
+bio_split).
+
+Address both of these issues by switching from bio_clone_bioset() to
+bio_split().
+
+Fixes: 18a25da8 ("dm: ensure bio submission follows a depth-first tree walk")
+Cc: stable@vger.kernel.org # 4.15+, requires removal of '&' before md->queue->bio_split
+Reported-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1581,10 +1581,9 @@ static blk_qc_t __split_and_process_bio(
+ * the usage of io->orig_bio in dm_remap_zone_report()
+ * won't be affected by this reassignment.
+ */
+- struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
+- md->queue->bio_split);
++ struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
++ GFP_NOIO, md->queue->bio_split);
+ ci.io->orig_bio = b;
+- bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
+ bio_chain(b, bio);
+ ret = generic_make_request(bio);
+ break;
--- /dev/null
+From 2d0b2d64d325e22939d9db3ba784f1236459ed98 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Fri, 22 Jun 2018 08:09:11 -0700
+Subject: dm zoned: avoid triggering reclaim from inside dmz_map()
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 2d0b2d64d325e22939d9db3ba784f1236459ed98 upstream.
+
+This patch avoids that lockdep reports the following:
+
+======================================================
+WARNING: possible circular locking dependency detected
+4.18.0-rc1 #62 Not tainted
+------------------------------------------------------
+kswapd0/84 is trying to acquire lock:
+00000000c313516d (&xfs_nondir_ilock_class){++++}, at: xfs_free_eofblocks+0xa2/0x1e0
+
+but task is already holding lock:
+00000000591c83ae (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #2 (fs_reclaim){+.+.}:
+ kmem_cache_alloc+0x2c/0x2b0
+ radix_tree_node_alloc.constprop.19+0x3d/0xc0
+ __radix_tree_create+0x161/0x1c0
+ __radix_tree_insert+0x45/0x210
+ dmz_map+0x245/0x2d0 [dm_zoned]
+ __map_bio+0x40/0x260
+ __split_and_process_non_flush+0x116/0x220
+ __split_and_process_bio+0x81/0x180
+ __dm_make_request.isra.32+0x5a/0x100
+ generic_make_request+0x36e/0x690
+ submit_bio+0x6c/0x140
+ mpage_readpages+0x19e/0x1f0
+ read_pages+0x6d/0x1b0
+ __do_page_cache_readahead+0x21b/0x2d0
+ force_page_cache_readahead+0xc4/0x100
+ generic_file_read_iter+0x7c6/0xd20
+ __vfs_read+0x102/0x180
+ vfs_read+0x9b/0x140
+ ksys_read+0x55/0xc0
+ do_syscall_64+0x5a/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+-> #1 (&dmz->chunk_lock){+.+.}:
+ dmz_map+0x133/0x2d0 [dm_zoned]
+ __map_bio+0x40/0x260
+ __split_and_process_non_flush+0x116/0x220
+ __split_and_process_bio+0x81/0x180
+ __dm_make_request.isra.32+0x5a/0x100
+ generic_make_request+0x36e/0x690
+ submit_bio+0x6c/0x140
+ _xfs_buf_ioapply+0x31c/0x590
+ xfs_buf_submit_wait+0x73/0x520
+ xfs_buf_read_map+0x134/0x2f0
+ xfs_trans_read_buf_map+0xc3/0x580
+ xfs_read_agf+0xa5/0x1e0
+ xfs_alloc_read_agf+0x59/0x2b0
+ xfs_alloc_pagf_init+0x27/0x60
+ xfs_bmap_longest_free_extent+0x43/0xb0
+ xfs_bmap_btalloc_nullfb+0x7f/0xf0
+ xfs_bmap_btalloc+0x428/0x7c0
+ xfs_bmapi_write+0x598/0xcc0
+ xfs_iomap_write_allocate+0x15a/0x330
+ xfs_map_blocks+0x1cf/0x3f0
+ xfs_do_writepage+0x15f/0x7b0
+ write_cache_pages+0x1ca/0x540
+ xfs_vm_writepages+0x65/0xa0
+ do_writepages+0x48/0xf0
+ __writeback_single_inode+0x58/0x730
+ writeback_sb_inodes+0x249/0x5c0
+ wb_writeback+0x11e/0x550
+ wb_workfn+0xa3/0x670
+ process_one_work+0x228/0x670
+ worker_thread+0x3c/0x390
+ kthread+0x11c/0x140
+ ret_from_fork+0x3a/0x50
+
+-> #0 (&xfs_nondir_ilock_class){++++}:
+ down_read_nested+0x43/0x70
+ xfs_free_eofblocks+0xa2/0x1e0
+ xfs_fs_destroy_inode+0xac/0x270
+ dispose_list+0x51/0x80
+ prune_icache_sb+0x52/0x70
+ super_cache_scan+0x127/0x1a0
+ shrink_slab.part.47+0x1bd/0x590
+ shrink_node+0x3b5/0x470
+ balance_pgdat+0x158/0x3b0
+ kswapd+0x1ba/0x600
+ kthread+0x11c/0x140
+ ret_from_fork+0x3a/0x50
+
+other info that might help us debug this:
+
+Chain exists of:
+ &xfs_nondir_ilock_class --> &dmz->chunk_lock --> fs_reclaim
+
+Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+lock(fs_reclaim);
+ lock(&dmz->chunk_lock);
+ lock(fs_reclaim);
+lock(&xfs_nondir_ilock_class);
+
+---
+ drivers/md/dm-zoned-target.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-zoned-target.c
++++ b/drivers/md/dm-zoned-target.c
+@@ -788,7 +788,7 @@ static int dmz_ctr(struct dm_target *ti,
+
+ /* Chunk BIO work */
+ mutex_init(&dmz->chunk_lock);
+- INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL);
++ INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
+ dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND,
+ 0, dev->name);
+ if (!dmz->chunk_wq) {
--- /dev/null
+From 4557641b4c7046625c026fb809c47ef0d43ae595 Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Tue, 26 Jun 2018 16:30:39 -0600
+Subject: pmem: only set QUEUE_FLAG_DAX for fsdax mode
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit 4557641b4c7046625c026fb809c47ef0d43ae595 upstream.
+
+QUEUE_FLAG_DAX is an indication that a given block device supports
+filesystem DAX and should not be set for PMEM namespaces which are in "raw"
+mode. These namespaces lack struct page and are prevented from
+participating in filesystem DAX as of commit 569d0365f571 ("dax: require
+'struct page' by default for filesystem dax").
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Suggested-by: Mike Snitzer <snitzer@redhat.com>
+Fixes: 569d0365f571 ("dax: require 'struct page' by default for filesystem dax")
+Cc: stable@vger.kernel.org
+Acked-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pmem.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -387,7 +387,8 @@ static int pmem_attach_disk(struct devic
+ blk_queue_logical_block_size(q, pmem_sector_size(ndns));
+ blk_queue_max_hw_sectors(q, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+- blk_queue_flag_set(QUEUE_FLAG_DAX, q);
++ if (pmem->pfn_flags & PFN_MAP)
++ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+ q->queuedata = pmem;
+
+ disk = alloc_disk_node(0, nid);
--- /dev/null
+From 0da74120c5341389b97c4ee27487a97224999ee1 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Thu, 28 Jun 2018 20:39:54 -0400
+Subject: selinux: move user accesses in selinuxfs out of locked regions
+
+From: Jann Horn <jannh@google.com>
+
+commit 0da74120c5341389b97c4ee27487a97224999ee1 upstream.
+
+If a user is accessing a file in selinuxfs with a pointer to a userspace
+buffer that is backed by e.g. a userfaultfd, the userspace access can
+stall indefinitely, which can block fsi->mutex if it is held.
+
+For sel_read_policy(), remove the locking, since this method doesn't seem
+to access anything that requires locking.
+
+For sel_read_bool(), move the user access below the locked region.
+
+For sel_write_bool() and sel_commit_bools_write(), move the user access
+up above the locked region.
+
+Cc: stable@vger.kernel.org
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
+[PM: removed an unused variable in sel_read_policy()]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/selinux/selinuxfs.c | 78 ++++++++++++++++++-------------------------
+ 1 file changed, 33 insertions(+), 45 deletions(-)
+
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -435,22 +435,16 @@ static int sel_release_policy(struct ino
+ static ssize_t sel_read_policy(struct file *filp, char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+- struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info;
+ struct policy_load_memory *plm = filp->private_data;
+ int ret;
+
+- mutex_lock(&fsi->mutex);
+-
+ ret = avc_has_perm(&selinux_state,
+ current_sid(), SECINITSID_SECURITY,
+ SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL);
+ if (ret)
+- goto out;
++ return ret;
+
+- ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len);
+-out:
+- mutex_unlock(&fsi->mutex);
+- return ret;
++ return simple_read_from_buffer(buf, count, ppos, plm->data, plm->len);
+ }
+
+ static int sel_mmap_policy_fault(struct vm_fault *vmf)
+@@ -1182,25 +1176,29 @@ static ssize_t sel_read_bool(struct file
+ ret = -EINVAL;
+ if (index >= fsi->bool_num || strcmp(name,
+ fsi->bool_pending_names[index]))
+- goto out;
++ goto out_unlock;
+
+ ret = -ENOMEM;
+ page = (char *)get_zeroed_page(GFP_KERNEL);
+ if (!page)
+- goto out;
++ goto out_unlock;
+
+ cur_enforcing = security_get_bool_value(fsi->state, index);
+ if (cur_enforcing < 0) {
+ ret = cur_enforcing;
+- goto out;
++ goto out_unlock;
+ }
+ length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing,
+ fsi->bool_pending_values[index]);
+- ret = simple_read_from_buffer(buf, count, ppos, page, length);
+-out:
+ mutex_unlock(&fsi->mutex);
++ ret = simple_read_from_buffer(buf, count, ppos, page, length);
++out_free:
+ free_page((unsigned long)page);
+ return ret;
++
++out_unlock:
++ mutex_unlock(&fsi->mutex);
++ goto out_free;
+ }
+
+ static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
+@@ -1213,6 +1211,17 @@ static ssize_t sel_write_bool(struct fil
+ unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
+ const char *name = filep->f_path.dentry->d_name.name;
+
++ if (count >= PAGE_SIZE)
++ return -ENOMEM;
++
++ /* No partial writes. */
++ if (*ppos != 0)
++ return -EINVAL;
++
++ page = memdup_user_nul(buf, count);
++ if (IS_ERR(page))
++ return PTR_ERR(page);
++
+ mutex_lock(&fsi->mutex);
+
+ length = avc_has_perm(&selinux_state,
+@@ -1227,22 +1236,6 @@ static ssize_t sel_write_bool(struct fil
+ fsi->bool_pending_names[index]))
+ goto out;
+
+- length = -ENOMEM;
+- if (count >= PAGE_SIZE)
+- goto out;
+-
+- /* No partial writes. */
+- length = -EINVAL;
+- if (*ppos != 0)
+- goto out;
+-
+- page = memdup_user_nul(buf, count);
+- if (IS_ERR(page)) {
+- length = PTR_ERR(page);
+- page = NULL;
+- goto out;
+- }
+-
+ length = -EINVAL;
+ if (sscanf(page, "%d", &new_value) != 1)
+ goto out;
+@@ -1274,6 +1267,17 @@ static ssize_t sel_commit_bools_write(st
+ ssize_t length;
+ int new_value;
+
++ if (count >= PAGE_SIZE)
++ return -ENOMEM;
++
++ /* No partial writes. */
++ if (*ppos != 0)
++ return -EINVAL;
++
++ page = memdup_user_nul(buf, count);
++ if (IS_ERR(page))
++ return PTR_ERR(page);
++
+ mutex_lock(&fsi->mutex);
+
+ length = avc_has_perm(&selinux_state,
+@@ -1283,22 +1287,6 @@ static ssize_t sel_commit_bools_write(st
+ if (length)
+ goto out;
+
+- length = -ENOMEM;
+- if (count >= PAGE_SIZE)
+- goto out;
+-
+- /* No partial writes. */
+- length = -EINVAL;
+- if (*ppos != 0)
+- goto out;
+-
+- page = memdup_user_nul(buf, count);
+- if (IS_ERR(page)) {
+- length = PTR_ERR(page);
+- page = NULL;
+- goto out;
+- }
+-
+ length = -EINVAL;
+ if (sscanf(page, "%d", &new_value) != 1)
+ goto out;
rbd-flush-rbd_dev-watch_dwork-after-watch-is-unregistered.patch
mm-ksm.c-ignore-stable_flag-of-rmap_item-address-in-rmap_walk_ksm.patch
mm-fix-devmem_is_allowed-for-sub-page-system-ram-intersections.patch
-x86-mm-don-t-free-p4d-table-when-it-is-folded-at-runtime.patch
tracing-check-for-no-filter-when-processing-event-filters.patch
xen-remove-unnecessary-bug_on-from-__unbind_from_irq.patch
net-ethernet-fix-suspend-resume-in-davinci_emac.patch
i2c-gpio-initialize-scl-to-high-again.patch
slub-fix-failure-when-we-delete-and-create-a-slab-cache.patch
kasan-depend-on-config_slub_debug.patch
+dm-prevent-dax-mounts-if-not-supported.patch
+dm-use-bio_split-when-splitting-out-the-already-processed-bio.patch
+pmem-only-set-queue_flag_dax-for-fsdax-mode.patch
+block-fix-transfer-when-chunk-sectors-exceeds-max.patch
+block-fix-cloning-of-requests-with-a-special-payload.patch
+x86-e820-put-e820_type_ram-regions-into-memblock.reserved.patch
+selinux-move-user-accesses-in-selinuxfs-out-of-locked-regions.patch
+x86-entry-64-compat-fix-x86-entry-64-compat-preserve-r8-r11-in-int-0x80.patch
+x86-efi-fix-efi_call_phys_epilog-with-config_x86_5level-y.patch
+dm-zoned-avoid-triggering-reclaim-from-inside-dmz_map.patch
+dm-thin-handle-running-out-of-data-space-vs-concurrent-discard.patch
--- /dev/null
+From 124049decbb121ec32742c94fb5d9d6bed8f24d8 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 27 Jun 2018 23:26:13 -0700
+Subject: x86/e820: put !E820_TYPE_RAM regions into memblock.reserved
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 124049decbb121ec32742c94fb5d9d6bed8f24d8 upstream.
+
+There is a kernel panic that is triggered when reading /proc/kpageflags
+on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
+
+ BUG: unable to handle kernel paging request at fffffffffffffffe
+ PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
+ Oops: 0000 [#1] SMP PTI
+ CPU: 2 PID: 1728 Comm: page-types Not tainted 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014
+ RIP: 0010:stable_page_flags+0x27/0x3c0
+ Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
+ RSP: 0018:ffffbbd44111fde0 EFLAGS: 00010202
+ RAX: fffffffffffffffe RBX: 00007fffffffeff9 RCX: 0000000000000000
+ RDX: 0000000000000001 RSI: 0000000000000202 RDI: ffffed1182fff5c0
+ RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000001
+ R10: ffffbbd44111fed8 R11: 0000000000000000 R12: ffffed1182fff5c0
+ R13: 00000000000bffd7 R14: 0000000002fff5c0 R15: ffffbbd44111ff10
+ FS: 00007efc4335a500(0000) GS:ffff93a5bfc00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: fffffffffffffffe CR3: 00000000b2a58000 CR4: 00000000001406e0
+ Call Trace:
+ kpageflags_read+0xc7/0x120
+ proc_reg_read+0x3c/0x60
+ __vfs_read+0x36/0x170
+ vfs_read+0x89/0x130
+ ksys_pread64+0x71/0x90
+ do_syscall_64+0x5b/0x160
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x7efc42e75e23
+ Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
+
+According to kernel bisection, this problem became visible due to commit
+f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
+which changes how struct pages are initialized.
+
+Memblock layout affects the pfn ranges covered by node/zone. Consider
+that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
+the default (no memmap= given) memblock layout is like below:
+
+ MEMBLOCK configuration:
+ memory size = 0x00000001fff75c00 reserved size = 0x000000000300c000
+ memory.cnt = 0x4
+ memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
+ memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
+ memory[0x2] [0x0000000100000000-0x000000013fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0
+ memory[0x3] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
+ ...
+
+If you give memmap=1G!4G (so it just covers memory[0x2]),
+the range [0x100000000-0x13fffffff] is gone:
+
+ MEMBLOCK configuration:
+ memory size = 0x00000001bff75c00 reserved size = 0x000000000300c000
+ memory.cnt = 0x3
+ memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
+ memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
+ memory[0x2] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
+ ...
+
+This causes shrinking node 0's pfn range because it is calculated by the
+address range of memblock.memory. So some of struct pages in the gap
+range are left uninitialized.
+
+We have a function zero_resv_unavail() which does zeroing the struct pages
+within the reserved unavailable range (i.e. memblock.memory &&
+!memblock.reserved). This patch utilizes it to cover all unavailable
+ranges by putting them into memblock.reserved.
+
+Link: http://lkml.kernel.org/r/20180615072947.GB23273@hori1.linux.bs1.fc.nec.co.jp
+Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Tested-by: Oscar Salvador <osalvador@suse.de>
+Tested-by: "Herton R. Krzesinski" <herton@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Cc: Steven Sistare <steven.sistare@oracle.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/e820.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -1246,6 +1246,7 @@ void __init e820__memblock_setup(void)
+ {
+ int i;
+ u64 end;
++ u64 addr = 0;
+
+ /*
+ * The bootstrap memblock region count maximum is 128 entries
+@@ -1262,13 +1263,21 @@ void __init e820__memblock_setup(void)
+ struct e820_entry *entry = &e820_table->entries[i];
+
+ end = entry->addr + entry->size;
++ if (addr < entry->addr)
++ memblock_reserve(addr, entry->addr - addr);
++ addr = end;
+ if (end != (resource_size_t)end)
+ continue;
+
++ /*
++ * all !E820_TYPE_RAM ranges (including gap ranges) are put
++ * into memblock.reserved to make sure that struct pages in
++ * such regions are not left uninitialized after bootup.
++ */
+ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
+- continue;
+-
+- memblock_add(entry->addr, entry->size);
++ memblock_reserve(entry->addr, entry->size);
++ else
++ memblock_add(entry->addr, entry->size);
+ }
+
+ /* Throw away partial pages: */
--- /dev/null
+From cfe19577047e74cdac5826adbdc2337d8437f8fb Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Mon, 25 Jun 2018 15:08:52 +0300
+Subject: x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit cfe19577047e74cdac5826adbdc2337d8437f8fb upstream.
+
+Open-coded page table entry checks don't work correctly when we fold the
+page table level at runtime.
+
+pgd_present() on 4-level paging machine always returns true, but
+open-coded version of the check may return false-negative result and
+we silently skip the rest of the loop body in efi_call_phys_epilog().
+
+Replace open-coded checks with proper helpers.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org # v4.12+
+Fixes: 94133e46a0f5 ("x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled")
+Link: http://lkml.kernel.org/r/20180625120852.18300-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/efi/efi_64.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *
+ pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
+- if (!(pgd_val(*pgd) & _PAGE_PRESENT))
++ if (!pgd_present(*pgd))
+ continue;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_offset(pgd,
+ pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+- if (!(p4d_val(*p4d) & _PAGE_PRESENT))
++ if (!p4d_present(*p4d))
+ continue;
+
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
--- /dev/null
+From 22cd978e598618e82c3c3348d2069184f6884182 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 26 Jun 2018 22:45:52 -0700
+Subject: x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80"
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 22cd978e598618e82c3c3348d2069184f6884182 upstream.
+
+Commit:
+
+ 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80")
+
+was busted: my original patch had a minor conflict with
+some of the nospec changes, but "git apply" is very clever
+and silently accepted the patch by making the same changes
+to a different function in the same file. There was obviously
+a huge offset, but "git apply" for some reason doesn't feel
+any need to say so.
+
+Move the changes to the correct function. Now the
+test_syscall_vdso_32 selftests passes.
+
+If anyone cares to observe the original problem, try applying the
+patch at:
+
+ https://lore.kernel.org/lkml/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org/raw
+
+to the kernel at 316d097c4cd4e7f2ef50c40cff2db266593c4ec4:
+
+ - "git am" and "git apply" accept the patch without any complaints at all
+ - "patch -p1" at least prints out a message about the huge offset.
+
+Reported-by: zhijianx.li@intel.com
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org #v4.17+
+Fixes: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80")
+Link: http://lkml.kernel.org/r/6012b922485401bc42676e804171ded262fc2ef2.1530078306.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_64_compat.S | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+- pushq %r8 /* pt_regs->r8 */
++ pushq $0 /* pt_regs->r8 = 0 */
+ xorl %r8d, %r8d /* nospec r8 */
+- pushq %r9 /* pt_regs->r9 */
++ pushq $0 /* pt_regs->r9 = 0 */
+ xorl %r9d, %r9d /* nospec r9 */
+- pushq %r10 /* pt_regs->r10 */
++ pushq $0 /* pt_regs->r10 = 0 */
+ xorl %r10d, %r10d /* nospec r10 */
+- pushq %r11 /* pt_regs->r11 */
++ pushq $0 /* pt_regs->r11 = 0 */
+ xorl %r11d, %r11d /* nospec r11 */
+ pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
+@@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat)
+ pushq %rcx /* pt_regs->cx */
+ xorl %ecx, %ecx /* nospec cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+- pushq $0 /* pt_regs->r8 = 0 */
++ pushq %r8 /* pt_regs->r8 */
+ xorl %r8d, %r8d /* nospec r8 */
+- pushq $0 /* pt_regs->r9 = 0 */
++ pushq %r9 /* pt_regs->r9 */
+ xorl %r9d, %r9d /* nospec r9 */
+- pushq $0 /* pt_regs->r10 = 0 */
++ pushq %r10 /* pt_regs->r10*/
+ xorl %r10d, %r10d /* nospec r10 */
+- pushq $0 /* pt_regs->r11 = 0 */
++ pushq %r11 /* pt_regs->r11 */
+ xorl %r11d, %r11d /* nospec r11 */
+ pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
+++ /dev/null
-From 0e311d237d7f3022b7dafb639b42541bfb42fe94 Mon Sep 17 00:00:00 2001
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Date: Mon, 25 Jun 2018 13:24:27 +0300
-Subject: x86/mm: Don't free P4D table when it is folded at runtime
-
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-
-commit 0e311d237d7f3022b7dafb639b42541bfb42fe94 upstream.
-
-When the P4D page table layer is folded at runtime, the p4d_free()
-should do nothing, the same as in <asm-generic/pgtable-nop4d.h>.
-
-It seems this bug should cause double-free in efi_call_phys_epilog(),
-but I don't know how to trigger that code path, so I can't confirm that
-by testing.
-
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: stable@vger.kernel.org # 4.17
-Fixes: 98219dda2ab5 ("x86/mm: Fold p4d page table layer at runtime")
-Link: http://lkml.kernel.org/r/20180625102427.15015-1-aryabinin@virtuozzo.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/include/asm/pgalloc.h | 3 +++
- 1 file changed, 3 insertions(+)
-
---- a/arch/x86/include/asm/pgalloc.h
-+++ b/arch/x86/include/asm/pgalloc.h
-@@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struc
-
- static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
- {
-+ if (!pgtable_l5_enabled())
-+ return;
-+
- BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
- free_page((unsigned long)p4d);
- }