]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 May 2019 12:13:16 +0000 (14:13 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 May 2019 12:13:16 +0000 (14:13 +0200)
added patches:
bio-fix-improper-use-of-smp_mb__before_atomic.patch
crypto-hash-fix-incorrect-hash_max_descsize.patch
crypto-vmx-ctr-always-increment-iv-as-quadword.patch
dax-arrange-for-dax_supported-check-to-span-multiple-devices.patch
kvm-check-irqchip-mode-before-assign-irqfd.patch
kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch
kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch
kvm-x86-fix-return-value-for-reserved-efer.patch
libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch
mmc-sdhci-iproc-cygnus-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
mmc-sdhci-iproc-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
revert-scsi-sd-keep-disk-read-only-when-re-reading-partition.patch
sbitmap-fix-improper-use-of-smp_mb__before_atomic.patch
tracing-add-a-check_val-check-before-updating-cond_snapshot-track_val.patch
x86-kvm-pmu-set-amd-s-virt-pmu-version-to-1.patch

16 files changed:
queue-5.1/bio-fix-improper-use-of-smp_mb__before_atomic.patch [new file with mode: 0644]
queue-5.1/crypto-hash-fix-incorrect-hash_max_descsize.patch [new file with mode: 0644]
queue-5.1/crypto-vmx-ctr-always-increment-iv-as-quadword.patch [new file with mode: 0644]
queue-5.1/dax-arrange-for-dax_supported-check-to-span-multiple-devices.patch [new file with mode: 0644]
queue-5.1/kvm-check-irqchip-mode-before-assign-irqfd.patch [new file with mode: 0644]
queue-5.1/kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch [new file with mode: 0644]
queue-5.1/kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch [new file with mode: 0644]
queue-5.1/kvm-x86-fix-return-value-for-reserved-efer.patch [new file with mode: 0644]
queue-5.1/libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch [new file with mode: 0644]
queue-5.1/mmc-sdhci-iproc-cygnus-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch [new file with mode: 0644]
queue-5.1/mmc-sdhci-iproc-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch [new file with mode: 0644]
queue-5.1/revert-scsi-sd-keep-disk-read-only-when-re-reading-partition.patch [new file with mode: 0644]
queue-5.1/sbitmap-fix-improper-use-of-smp_mb__before_atomic.patch [new file with mode: 0644]
queue-5.1/series
queue-5.1/tracing-add-a-check_val-check-before-updating-cond_snapshot-track_val.patch [new file with mode: 0644]
queue-5.1/x86-kvm-pmu-set-amd-s-virt-pmu-version-to-1.patch [new file with mode: 0644]

diff --git a/queue-5.1/bio-fix-improper-use-of-smp_mb__before_atomic.patch b/queue-5.1/bio-fix-improper-use-of-smp_mb__before_atomic.patch
new file mode 100644 (file)
index 0000000..d536eb1
--- /dev/null
@@ -0,0 +1,43 @@
+From f381c6a4bd0ae0fde2d6340f1b9bb0f58d915de6 Mon Sep 17 00:00:00 2001
+From: Andrea Parri <andrea.parri@amarulasolutions.com>
+Date: Mon, 20 May 2019 19:23:56 +0200
+Subject: bio: fix improper use of smp_mb__before_atomic()
+
+From: Andrea Parri <andrea.parri@amarulasolutions.com>
+
+commit f381c6a4bd0ae0fde2d6340f1b9bb0f58d915de6 upstream.
+
+This barrier only applies to the read-modify-write operations; in
+particular, it does not apply to the atomic_set() primitive.
+
+Replace the barrier with an smp_mb().
+
+Fixes: dac56212e8127 ("bio: skip atomic inc/dec of ->bi_cnt for most use cases")
+Cc: stable@vger.kernel.org
+Reported-by: "Paul E. McKenney" <paulmck@linux.ibm.com>
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: linux-block@vger.kernel.org
+Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/bio.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -224,7 +224,7 @@ static inline void bio_cnt_set(struct bi
+ {
+       if (count != 1) {
+               bio->bi_flags |= (1 << BIO_REFFED);
+-              smp_mb__before_atomic();
++              smp_mb();
+       }
+       atomic_set(&bio->__bi_cnt, count);
+ }
diff --git a/queue-5.1/crypto-hash-fix-incorrect-hash_max_descsize.patch b/queue-5.1/crypto-hash-fix-incorrect-hash_max_descsize.patch
new file mode 100644 (file)
index 0000000..d05b14e
--- /dev/null
@@ -0,0 +1,102 @@
+From e1354400b25da645c4764ed6844d12f1582c3b66 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Tue, 14 May 2019 16:13:15 -0700
+Subject: crypto: hash - fix incorrect HASH_MAX_DESCSIZE
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit e1354400b25da645c4764ed6844d12f1582c3b66 upstream.
+
+The "hmac(sha3-224-generic)" algorithm has a descsize of 368 bytes,
+which is greater than HASH_MAX_DESCSIZE (360) which is only enough for
+sha3-224-generic.  The check in shash_prepare_alg() doesn't catch this
+because the HMAC template doesn't set descsize on the algorithms, but
+rather sets it on each individual HMAC transform.
+
+This causes a stack buffer overflow when SHASH_DESC_ON_STACK() is used
+with hmac(sha3-224-generic).
+
+Fix it by increasing HASH_MAX_DESCSIZE to the real maximum.  Also add a
+sanity check to hmac_init().
+
+This was detected by the improved crypto self-tests in v5.2, by loading
+the tcrypt module with CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y enabled.  I
+didn't notice this bug when I ran the self-tests by requesting the
+algorithms via AF_ALG (i.e., not using tcrypt), probably because the
+stack layout differs in the two cases and that made a difference here.
+
+KASAN report:
+
+    BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:359 [inline]
+    BUG: KASAN: stack-out-of-bounds in shash_default_import+0x52/0x80 crypto/shash.c:223
+    Write of size 360 at addr ffff8880651defc8 by task insmod/3689
+
+    CPU: 2 PID: 3689 Comm: insmod Tainted: G            E     5.1.0-10741-g35c99ffa20edd #11
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
+    Call Trace:
+     __dump_stack lib/dump_stack.c:77 [inline]
+     dump_stack+0x86/0xc5 lib/dump_stack.c:113
+     print_address_description+0x7f/0x260 mm/kasan/report.c:188
+     __kasan_report+0x144/0x187 mm/kasan/report.c:317
+     kasan_report+0x12/0x20 mm/kasan/common.c:614
+     check_memory_region_inline mm/kasan/generic.c:185 [inline]
+     check_memory_region+0x137/0x190 mm/kasan/generic.c:191
+     memcpy+0x37/0x50 mm/kasan/common.c:125
+     memcpy include/linux/string.h:359 [inline]
+     shash_default_import+0x52/0x80 crypto/shash.c:223
+     crypto_shash_import include/crypto/hash.h:880 [inline]
+     hmac_import+0x184/0x240 crypto/hmac.c:102
+     hmac_init+0x96/0xc0 crypto/hmac.c:107
+     crypto_shash_init include/crypto/hash.h:902 [inline]
+     shash_digest_unaligned+0x9f/0xf0 crypto/shash.c:194
+     crypto_shash_digest+0xe9/0x1b0 crypto/shash.c:211
+     generate_random_hash_testvec.constprop.11+0x1ec/0x5b0 crypto/testmgr.c:1331
+     test_hash_vs_generic_impl+0x3f7/0x5c0 crypto/testmgr.c:1420
+     __alg_test_hash+0x26d/0x340 crypto/testmgr.c:1502
+     alg_test_hash+0x22e/0x330 crypto/testmgr.c:1552
+     alg_test.part.7+0x132/0x610 crypto/testmgr.c:4931
+     alg_test+0x1f/0x40 crypto/testmgr.c:4952
+
+Fixes: b68a7ec1e9a3 ("crypto: hash - Remove VLA usage")
+Reported-by: Corentin Labbe <clabbe.montjoie@gmail.com>
+Cc: <stable@vger.kernel.org> # v4.20+
+Cc: Kees Cook <keescook@chromium.org>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/hmac.c         |    2 ++
+ include/crypto/hash.h |    8 +++++++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/crypto/hmac.c
++++ b/crypto/hmac.c
+@@ -168,6 +168,8 @@ static int hmac_init_tfm(struct crypto_t
+       parent->descsize = sizeof(struct shash_desc) +
+                          crypto_shash_descsize(hash);
++      if (WARN_ON(parent->descsize > HASH_MAX_DESCSIZE))
++              return -EINVAL;
+       ctx->hash = hash;
+       return 0;
+--- a/include/crypto/hash.h
++++ b/include/crypto/hash.h
+@@ -152,7 +152,13 @@ struct shash_desc {
+ };
+ #define HASH_MAX_DIGESTSIZE    64
+-#define HASH_MAX_DESCSIZE     360
++
++/*
++ * Worst case is hmac(sha3-224-generic).  Its context is a nested 'shash_desc'
++ * containing a 'struct sha3_state'.
++ */
++#define HASH_MAX_DESCSIZE     (sizeof(struct shash_desc) + 360)
++
+ #define HASH_MAX_STATESIZE    512
+ #define SHASH_DESC_ON_STACK(shash, ctx)                                 \
diff --git a/queue-5.1/crypto-vmx-ctr-always-increment-iv-as-quadword.patch b/queue-5.1/crypto-vmx-ctr-always-increment-iv-as-quadword.patch
new file mode 100644 (file)
index 0000000..07bcccb
--- /dev/null
@@ -0,0 +1,57 @@
+From 009b30ac7444c17fae34c4f435ebce8e8e2b3250 Mon Sep 17 00:00:00 2001
+From: Daniel Axtens <dja@axtens.net>
+Date: Wed, 15 May 2019 20:24:50 +1000
+Subject: crypto: vmx - CTR: always increment IV as quadword
+
+From: Daniel Axtens <dja@axtens.net>
+
+commit 009b30ac7444c17fae34c4f435ebce8e8e2b3250 upstream.
+
+The kernel self-tests picked up an issue with CTR mode:
+alg: skcipher: p8_aes_ctr encryption test failed (wrong result) on test vector 3, cfg="uneven misaligned splits, may sleep"
+
+Test vector 3 has an IV of FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD, so
+after 3 increments it should wrap around to 0.
+
+In the aesp8-ppc code from OpenSSL, there are two paths that
+increment IVs: the bulk (8 at a time) path, and the individual
+path which is used when there are fewer than 8 AES blocks to
+process.
+
+In the bulk path, the IV is incremented with vadduqm: "Vector
+Add Unsigned Quadword Modulo", which does 128-bit addition.
+
+In the individual path, however, the IV is incremented with
+vadduwm: "Vector Add Unsigned Word Modulo", which instead
+does 4 32-bit additions. Thus the IV would instead become
+FFFFFFFFFFFFFFFFFFFFFFFF00000000, throwing off the result.
+
+Use vadduqm.
+
+This was probably a typo originally, what with q and w being
+adjacent. It is a pretty narrow edge case: I am really
+impressed by the quality of the kernel self-tests!
+
+Fixes: 5c380d623ed3 ("crypto: vmx - Add support for VMS instructions by ASM")
+Cc: stable@vger.kernel.org
+Signed-off-by: Daniel Axtens <dja@axtens.net>
+Acked-by: Nayna Jain <nayna@linux.ibm.com>
+Tested-by: Nayna Jain <nayna@linux.ibm.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/vmx/aesp8-ppc.pl |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/crypto/vmx/aesp8-ppc.pl
++++ b/drivers/crypto/vmx/aesp8-ppc.pl
+@@ -1357,7 +1357,7 @@ Loop_ctr32_enc:
+       addi            $idx,$idx,16
+       bdnz            Loop_ctr32_enc
+-      vadduwm         $ivec,$ivec,$one
++      vadduqm         $ivec,$ivec,$one
+        vmr            $dat,$inptail
+        lvx            $inptail,0,$inp
+        addi           $inp,$inp,16
diff --git a/queue-5.1/dax-arrange-for-dax_supported-check-to-span-multiple-devices.patch b/queue-5.1/dax-arrange-for-dax_supported-check-to-span-multiple-devices.patch
new file mode 100644 (file)
index 0000000..49e1dc4
--- /dev/null
@@ -0,0 +1,363 @@
+From 7bf7eac8d648057519adb6fce1e31458c902212c Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 16 May 2019 13:26:29 -0700
+Subject: dax: Arrange for dax_supported check to span multiple devices
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 7bf7eac8d648057519adb6fce1e31458c902212c upstream.
+
+Pankaj reports that starting with commit ad428cdb525a "dax: Check the
+end of the block-device capacity with dax_direct_access()" device-mapper
+no longer allows dax operation. This results from the stricter checks in
+__bdev_dax_supported() that validate that the start and end of a
+block-device map to the same 'pagemap' instance.
+
+Teach the dax-core and device-mapper to validate the 'pagemap' on a
+per-target basis. This is accomplished by refactoring the
+bdev_dax_supported() internals into generic_fsdax_supported() which
+takes a sector range to validate. Consequently generic_fsdax_supported()
+is suitable to be used in a device-mapper ->iterate_devices() callback.
+A new ->dax_supported() operation is added to allow composite devices to
+split and route upper-level bdev_dax_supported() requests.
+
+Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
+Cc: <stable@vger.kernel.org>
+Cc: Ira Weiny <ira.weiny@intel.com>
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: Keith Busch <keith.busch@intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reported-by: Pankaj Gupta <pagupta@redhat.com>
+Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
+Tested-by: Pankaj Gupta <pagupta@redhat.com>
+Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
+Reviewed-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dax/super.c          |   88 +++++++++++++++++++++++++++----------------
+ drivers/md/dm-table.c        |   17 +++++---
+ drivers/md/dm.c              |   20 +++++++++
+ drivers/md/dm.h              |    1 
+ drivers/nvdimm/pmem.c        |    1 
+ drivers/s390/block/dcssblk.c |    1 
+ include/linux/dax.h          |   26 ++++++++++++
+ 7 files changed, 117 insertions(+), 37 deletions(-)
+
+--- a/drivers/dax/super.c
++++ b/drivers/dax/super.c
+@@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(st
+ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
+ #endif
+-/**
+- * __bdev_dax_supported() - Check if the device supports dax for filesystem
+- * @bdev: block device to check
+- * @blocksize: The block size of the device
+- *
+- * This is a library function for filesystems to check if the block device
+- * can be mounted with dax option.
+- *
+- * Return: true if supported, false if unsupported
+- */
+-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
++bool __generic_fsdax_supported(struct dax_device *dax_dev,
++              struct block_device *bdev, int blocksize, sector_t start,
++              sector_t sectors)
+ {
+-      struct dax_device *dax_dev;
+       bool dax_enabled = false;
+       pgoff_t pgoff, pgoff_end;
+-      struct request_queue *q;
+       char buf[BDEVNAME_SIZE];
+       void *kaddr, *end_kaddr;
+       pfn_t pfn, end_pfn;
+@@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_d
+               return false;
+       }
+-      q = bdev_get_queue(bdev);
+-      if (!q || !blk_queue_dax(q)) {
+-              pr_debug("%s: error: request queue doesn't support dax\n",
+-                              bdevname(bdev, buf));
+-              return false;
+-      }
+-
+-      err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
++      err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
+       if (err) {
+               pr_debug("%s: error: unaligned partition for dax\n",
+                               bdevname(bdev, buf));
+               return false;
+       }
+-      last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
++      last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
+       err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
+       if (err) {
+               pr_debug("%s: error: unaligned partition for dax\n",
+@@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_d
+               return false;
+       }
+-      dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+-      if (!dax_dev) {
+-              pr_debug("%s: error: device does not support dax\n",
+-                              bdevname(bdev, buf));
+-              return false;
+-      }
+-
+       id = dax_read_lock();
+       len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
+       len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
+       dax_read_unlock(id);
+-      put_dax(dax_dev);
+-
+       if (len < 1 || len2 < 1) {
+               pr_debug("%s: error: dax access failed (%ld)\n",
+                               bdevname(bdev, buf), len < 1 ? len : len2);
+@@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_d
+       }
+       return true;
+ }
++EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
++
++/**
++ * __bdev_dax_supported() - Check if the device supports dax for filesystem
++ * @bdev: block device to check
++ * @blocksize: The block size of the device
++ *
++ * This is a library function for filesystems to check if the block device
++ * can be mounted with dax option.
++ *
++ * Return: true if supported, false if unsupported
++ */
++bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
++{
++      struct dax_device *dax_dev;
++      struct request_queue *q;
++      char buf[BDEVNAME_SIZE];
++      bool ret;
++      int id;
++
++      q = bdev_get_queue(bdev);
++      if (!q || !blk_queue_dax(q)) {
++              pr_debug("%s: error: request queue doesn't support dax\n",
++                              bdevname(bdev, buf));
++              return false;
++      }
++
++      dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
++      if (!dax_dev) {
++              pr_debug("%s: error: device does not support dax\n",
++                              bdevname(bdev, buf));
++              return false;
++      }
++
++      id = dax_read_lock();
++      ret = dax_supported(dax_dev, bdev, blocksize, 0,
++                      i_size_read(bdev->bd_inode) / 512);
++      dax_read_unlock(id);
++
++      put_dax(dax_dev);
++
++      return ret;
++}
+ EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+ #endif
+@@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device
+ }
+ EXPORT_SYMBOL_GPL(dax_direct_access);
++bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
++              int blocksize, sector_t start, sector_t len)
++{
++      if (!dax_alive(dax_dev))
++              return false;
++
++      return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
++}
++
+ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+               size_t bytes, struct iov_iter *i)
+ {
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *
+ }
+ EXPORT_SYMBOL_GPL(dm_table_set_type);
++/* validate the dax capability of the target device span */
+ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+-                             sector_t start, sector_t len, void *data)
++                                     sector_t start, sector_t len, void *data)
+ {
+-      return bdev_dax_supported(dev->bdev, PAGE_SIZE);
++      int blocksize = *(int *) data;
++
++      return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
++                      start, len);
+ }
+-static bool dm_table_supports_dax(struct dm_table *t)
++bool dm_table_supports_dax(struct dm_table *t, int blocksize)
+ {
+       struct dm_target *ti;
+       unsigned i;
+@@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct
+                       return false;
+               if (!ti->type->iterate_devices ||
+-                  !ti->type->iterate_devices(ti, device_supports_dax, NULL))
++                  !ti->type->iterate_devices(ti, device_supports_dax,
++                          &blocksize))
+                       return false;
+       }
+@@ -979,7 +984,7 @@ static int dm_table_determine_type(struc
+ verify_bio_based:
+               /* We must use this table as bio-based */
+               t->type = DM_TYPE_BIO_BASED;
+-              if (dm_table_supports_dax(t) ||
++              if (dm_table_supports_dax(t, PAGE_SIZE) ||
+                   (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
+                       t->type = DM_TYPE_DAX_BIO_BASED;
+               } else {
+@@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm
+       }
+       blk_queue_write_cache(q, wc, fua);
+-      if (dm_table_supports_dax(t))
++      if (dm_table_supports_dax(t, PAGE_SIZE))
+               blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+       else
+               blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1105,6 +1105,25 @@ static long dm_dax_direct_access(struct
+       return ret;
+ }
++static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
++              int blocksize, sector_t start, sector_t len)
++{
++      struct mapped_device *md = dax_get_private(dax_dev);
++      struct dm_table *map;
++      int srcu_idx;
++      bool ret;
++
++      map = dm_get_live_table(md, &srcu_idx);
++      if (!map)
++              return false;
++
++      ret = dm_table_supports_dax(map, blocksize);
++
++      dm_put_live_table(md, srcu_idx);
++
++      return ret;
++}
++
+ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+                                   void *addr, size_t bytes, struct iov_iter *i)
+ {
+@@ -3194,6 +3213,7 @@ static const struct block_device_operati
+ static const struct dax_operations dm_dax_ops = {
+       .direct_access = dm_dax_direct_access,
++      .dax_supported = dm_dax_supported,
+       .copy_from_iter = dm_dax_copy_from_iter,
+       .copy_to_iter = dm_dax_copy_to_iter,
+ };
+--- a/drivers/md/dm.h
++++ b/drivers/md/dm.h
+@@ -72,6 +72,7 @@ bool dm_table_bio_based(struct dm_table
+ bool dm_table_request_based(struct dm_table *t);
+ void dm_table_free_md_mempools(struct dm_table *t);
+ struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
++bool dm_table_supports_dax(struct dm_table *t, int blocksize);
+ void dm_lock_md_type(struct mapped_device *md);
+ void dm_unlock_md_type(struct mapped_device *md);
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -295,6 +295,7 @@ static size_t pmem_copy_to_iter(struct d
+ static const struct dax_operations pmem_dax_ops = {
+       .direct_access = pmem_dax_direct_access,
++      .dax_supported = generic_fsdax_supported,
+       .copy_from_iter = pmem_copy_from_iter,
+       .copy_to_iter = pmem_copy_to_iter,
+ };
+--- a/drivers/s390/block/dcssblk.c
++++ b/drivers/s390/block/dcssblk.c
+@@ -59,6 +59,7 @@ static size_t dcssblk_dax_copy_to_iter(s
+ static const struct dax_operations dcssblk_dax_ops = {
+       .direct_access = dcssblk_dax_direct_access,
++      .dax_supported = generic_fsdax_supported,
+       .copy_from_iter = dcssblk_dax_copy_from_iter,
+       .copy_to_iter = dcssblk_dax_copy_to_iter,
+ };
+--- a/include/linux/dax.h
++++ b/include/linux/dax.h
+@@ -19,6 +19,12 @@ struct dax_operations {
+        */
+       long (*direct_access)(struct dax_device *, pgoff_t, long,
+                       void **, pfn_t *);
++      /*
++       * Validate whether this device is usable as an fsdax backing
++       * device.
++       */
++      bool (*dax_supported)(struct dax_device *, struct block_device *, int,
++                      sector_t, sector_t);
+       /* copy_from_iter: required operation for fs-dax direct-i/o */
+       size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
+                       struct iov_iter *);
+@@ -75,6 +81,17 @@ static inline bool bdev_dax_supported(st
+       return __bdev_dax_supported(bdev, blocksize);
+ }
++bool __generic_fsdax_supported(struct dax_device *dax_dev,
++              struct block_device *bdev, int blocksize, sector_t start,
++              sector_t sectors);
++static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
++              struct block_device *bdev, int blocksize, sector_t start,
++              sector_t sectors)
++{
++      return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
++                      sectors);
++}
++
+ static inline struct dax_device *fs_dax_get_by_host(const char *host)
+ {
+       return dax_get_by_host(host);
+@@ -99,6 +116,13 @@ static inline bool bdev_dax_supported(st
+       return false;
+ }
++static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
++              struct block_device *bdev, int blocksize, sector_t start,
++              sector_t sectors)
++{
++      return false;
++}
++
+ static inline struct dax_device *fs_dax_get_by_host(const char *host)
+ {
+       return NULL;
+@@ -142,6 +166,8 @@ bool dax_alive(struct dax_device *dax_de
+ void *dax_get_private(struct dax_device *dax_dev);
+ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
+               void **kaddr, pfn_t *pfn);
++bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
++              int blocksize, sector_t start, sector_t len);
+ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+               size_t bytes, struct iov_iter *i);
+ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
diff --git a/queue-5.1/kvm-check-irqchip-mode-before-assign-irqfd.patch b/queue-5.1/kvm-check-irqchip-mode-before-assign-irqfd.patch
new file mode 100644 (file)
index 0000000..a8192d6
--- /dev/null
@@ -0,0 +1,91 @@
+From 654f1f13ea56b92bacade8ce2725aea0457f91c0 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Sun, 5 May 2019 16:56:42 +0800
+Subject: kvm: Check irqchip mode before assign irqfd
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 654f1f13ea56b92bacade8ce2725aea0457f91c0 upstream.
+
+When assigning kvm irqfd we didn't check the irqchip mode but we allow
+KVM_IRQFD to succeed with all the irqchip modes.  However it does not
+make much sense to create irqfd even without the kernel chips.  Let's
+provide a arch-dependent helper to check whether a specific irqfd is
+allowed by the arch.  At least for x86, it should make sense to check:
+
+- when irqchip mode is NONE, all irqfds should be disallowed, and,
+
+- when irqchip mode is SPLIT, irqfds that are with resamplefd should
+  be disallowed.
+
+For either of the case, previously we'll silently ignore the irq or
+the irq ack event if the irqchip mode is incorrect.  However that can
+cause misterious guest behaviors and it can be hard to triage.  Let's
+fail KVM_IRQFD even earlier to detect these incorrect configurations.
+
+CC: Paolo Bonzini <pbonzini@redhat.com>
+CC: Radim Krčmář <rkrcmar@redhat.com>
+CC: Alex Williamson <alex.williamson@redhat.com>
+CC: Eduardo Habkost <ehabkost@redhat.com>
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/irq.c |    7 +++++++
+ arch/x86/kvm/irq.h |    1 +
+ virt/kvm/eventfd.c |    9 +++++++++
+ 3 files changed, 17 insertions(+)
+
+--- a/arch/x86/kvm/irq.c
++++ b/arch/x86/kvm/irq.c
+@@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcp
+       __kvm_migrate_apic_timer(vcpu);
+       __kvm_migrate_pit_timer(vcpu);
+ }
++
++bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
++{
++      bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE;
++
++      return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm);
++}
+--- a/arch/x86/kvm/irq.h
++++ b/arch/x86/kvm/irq.h
+@@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(stru
+       return mode != KVM_IRQCHIP_NONE;
+ }
++bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
+ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
+--- a/virt/kvm/eventfd.c
++++ b/virt/kvm/eventfd.c
+@@ -44,6 +44,12 @@
+ static struct workqueue_struct *irqfd_cleanup_wq;
++bool __attribute__((weak))
++kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
++{
++      return true;
++}
++
+ static void
+ irqfd_inject(struct work_struct *work)
+ {
+@@ -297,6 +303,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct
+       if (!kvm_arch_intc_initialized(kvm))
+               return -EAGAIN;
++      if (!kvm_arch_irqfd_allowed(kvm, args))
++              return -EINVAL;
++
+       irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
+       if (!irqfd)
+               return -ENOMEM;
diff --git a/queue-5.1/kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch b/queue-5.1/kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch
new file mode 100644 (file)
index 0000000..c027863
--- /dev/null
@@ -0,0 +1,72 @@
+From 541e886f7972cc647804dbb4909189e67987a945 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpengli@tencent.com>
+Date: Fri, 17 May 2019 16:49:50 +0800
+Subject: KVM: nVMX: Fix using __this_cpu_read() in preemptible context
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+commit 541e886f7972cc647804dbb4909189e67987a945 upstream.
+
+ BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/4590
+  caller is nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel]
+  CPU: 4 PID: 4590 Comm: qemu-system-x86 Tainted: G           OE     5.1.0-rc4+ #1
+  Call Trace:
+   dump_stack+0x67/0x95
+   __this_cpu_preempt_check+0xd2/0xe0
+   nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel]
+   nested_vmx_run+0xda/0x2b0 [kvm_intel]
+   handle_vmlaunch+0x13/0x20 [kvm_intel]
+   vmx_handle_exit+0xbd/0x660 [kvm_intel]
+   kvm_arch_vcpu_ioctl_run+0xa2c/0x1e50 [kvm]
+   kvm_vcpu_ioctl+0x3ad/0x6d0 [kvm]
+   do_vfs_ioctl+0xa5/0x6e0
+   ksys_ioctl+0x6d/0x80
+   __x64_sys_ioctl+0x1a/0x20
+   do_syscall_64+0x6f/0x6c0
+   entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Accessing per-cpu variable should disable preemption, this patch extends the
+preemption disable region for __this_cpu_read().
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Fixes: 52017608da33 ("KVM: nVMX: add option to perform early consistency checks via H/W")
+Cc: stable@vger.kernel.org
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx/nested.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2792,14 +2792,13 @@ static int nested_vmx_check_vmentry_hw(s
+             : "cc", "memory"
+       );
+-      preempt_enable();
+-
+       if (vmx->msr_autoload.host.nr)
+               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+       if (vmx->msr_autoload.guest.nr)
+               vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+       if (vm_fail) {
++              preempt_enable();
+               WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
+                            VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+@@ -2811,6 +2810,7 @@ static int nested_vmx_check_vmentry_hw(s
+       local_irq_enable();
+       if (hw_breakpoint_active())
+               set_debugreg(__this_cpu_read(cpu_dr7), 7);
++      preempt_enable();
+       /*
+        * A non-failing VMEntry means we somehow entered guest mode with
diff --git a/queue-5.1/kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch b/queue-5.1/kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch
new file mode 100644 (file)
index 0000000..3b5f67b
--- /dev/null
@@ -0,0 +1,43 @@
+From c9bcd3e3335d0a29d89fabd2c385e1b989e6f1b0 Mon Sep 17 00:00:00 2001
+From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
+Date: Tue, 14 May 2019 15:49:52 +0000
+Subject: kvm: svm/avic: fix off-by-one in checking host APIC ID
+
+From: Suthikulpanit, Suravee <Suravee.Suthikulpanit@amd.com>
+
+commit c9bcd3e3335d0a29d89fabd2c385e1b989e6f1b0 upstream.
+
+Current logic does not allow VCPU to be loaded onto CPU with
+APIC ID 255. This should be allowed since the host physical APIC ID
+field in the AVIC Physical APIC table entry is an 8-bit value,
+and APIC ID 255 is valid in system with x2APIC enabled.
+Instead, do not allow VCPU load if the host APIC ID cannot be
+represented by an 8-bit value.
+
+Also, use the more appropriate AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK
+instead of AVIC_MAX_PHYSICAL_ID_COUNT.
+
+Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/svm.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -2024,7 +2024,11 @@ static void avic_vcpu_load(struct kvm_vc
+       if (!kvm_vcpu_apicv_active(vcpu))
+               return;
+-      if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
++      /*
++       * Since the host physical APIC id is 8 bits,
++       * we can support host APIC ID upto 255.
++       */
++      if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
+               return;
+       entry = READ_ONCE(*(svm->avic_physical_id_cache));
diff --git a/queue-5.1/kvm-x86-fix-return-value-for-reserved-efer.patch b/queue-5.1/kvm-x86-fix-return-value-for-reserved-efer.patch
new file mode 100644 (file)
index 0000000..fdc8b5c
--- /dev/null
@@ -0,0 +1,36 @@
+From 66f61c92889ff3ca365161fb29dd36d6354682ba Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 24 May 2019 21:52:46 +0200
+Subject: KVM: x86: fix return value for reserved EFER
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 66f61c92889ff3ca365161fb29dd36d6354682ba upstream.
+
+Commit 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for
+host-initiated writes", 2019-04-02) introduced a "return false" in a
+function returning int, and anyway set_efer has a "nonzero on error"
+conventon so it should be returning 1.
+
+Reported-by: Pavel Machek <pavel@denx.de>
+Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes")
+Cc: Sean Christopherson <sean.j.christopherson@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1288,7 +1288,7 @@ static int set_efer(struct kvm_vcpu *vcp
+       u64 efer = msr_info->data;
+       if (efer & efer_reserved_bits)
+-              return false;
++              return 1;
+       if (!msr_info->host_initiated) {
+               if (!__kvm_valid_efer(vcpu, efer))
diff --git a/queue-5.1/libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch b/queue-5.1/libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch
new file mode 100644 (file)
index 0000000..055a47c
--- /dev/null
@@ -0,0 +1,79 @@
+From 52f476a323f9efc959be1c890d0cdcf12e1582e0 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 16 May 2019 17:05:21 -0700
+Subject: libnvdimm/pmem: Bypass CONFIG_HARDENED_USERCOPY overhead
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 52f476a323f9efc959be1c890d0cdcf12e1582e0 upstream.
+
+Jeff discovered that performance improves from ~375K iops to ~519K iops
+on a simple psync-write fio workload when moving the location of 'struct
+page' from the default PMEM location to DRAM. This result is surprising
+because the expectation is that 'struct page' for dax is only needed for
+third party references to dax mappings. For example, a dax-mapped buffer
+passed to another system call for direct-I/O requires 'struct page' for
+sending the request down the driver stack and pinning the page. There is
+no usage of 'struct page' for first party access to a file via
+read(2)/write(2) and friends.
+
+However, this "no page needed" expectation is violated by
+CONFIG_HARDENED_USERCOPY and the check_copy_size() performed in
+copy_from_iter_full_nocache() and copy_to_iter_mcsafe(). The
+check_heap_object() helper routine assumes the buffer is backed by a
+slab allocator (DRAM) page and applies some checks.  Those checks are
+invalid, dax pages do not originate from the slab, and redundant,
+dax_iomap_actor() has already validated that the I/O is within bounds.
+Specifically that routine validates that the logical file offset is
+within bounds of the file, then it does a sector-to-pfn translation
+which validates that the physical mapping is within bounds of the block
+device.
+
+Bypass additional hardened usercopy overhead and call the 'no check'
+versions of the copy_{to,from}_iter operations directly.
+
+Fixes: 0aed55af8834 ("x86, uaccess: introduce copy_from_iter_flushcache...")
+Cc: <stable@vger.kernel.org>
+Cc: Jeff Moyer <jmoyer@redhat.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Reported-and-tested-by: Jeff Smits <jeff.smits@intel.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Acked-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pmem.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -281,16 +281,22 @@ static long pmem_dax_direct_access(struc
+       return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
+ }
++/*
++ * Use the 'no check' versions of copy_from_iter_flushcache() and
++ * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
++ * checking, both file offset and device offset, is handled by
++ * dax_iomap_actor()
++ */
+ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+               void *addr, size_t bytes, struct iov_iter *i)
+ {
+-      return copy_from_iter_flushcache(addr, bytes, i);
++      return _copy_from_iter_flushcache(addr, bytes, i);
+ }
+ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+               void *addr, size_t bytes, struct iov_iter *i)
+ {
+-      return copy_to_iter_mcsafe(addr, bytes, i);
++      return _copy_to_iter_mcsafe(addr, bytes, i);
+ }
+ static const struct dax_operations pmem_dax_ops = {
diff --git a/queue-5.1/mmc-sdhci-iproc-cygnus-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch b/queue-5.1/mmc-sdhci-iproc-cygnus-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
new file mode 100644 (file)
index 0000000..2a135fb
--- /dev/null
@@ -0,0 +1,46 @@
+From b7dfa695afc40d5396ed84b9f25aa3754de23e39 Mon Sep 17 00:00:00 2001
+From: Trac Hoang <trac.hoang@broadcom.com>
+Date: Thu, 9 May 2019 10:24:26 -0700
+Subject: mmc: sdhci-iproc: cygnus: Set NO_HISPD bit to fix HS50 data hold time problem
+
+From: Trac Hoang <trac.hoang@broadcom.com>
+
+commit b7dfa695afc40d5396ed84b9f25aa3754de23e39 upstream.
+
+The iproc host eMMC/SD controller hold time does not meet the
+specification in the HS50 mode. This problem can be mitigated
+by disabling the HISPD bit; thus forcing the controller output
+data to be driven on the falling clock edges rather than the
+rising clock edges.
+
+This change applies only to the Cygnus platform.
+
+Stable tag (v4.12+) chosen to assist stable kernel maintainers so that
+the change does not produce merge conflicts backporting to older kernel
+versions. In reality, the timing bug existed since the driver was first
+introduced but there is no need for this driver to be supported in kernel
+versions that old.
+
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Trac Hoang <trac.hoang@broadcom.com>
+Signed-off-by: Scott Branden <scott.branden@broadcom.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/sdhci-iproc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-iproc.c
++++ b/drivers/mmc/host/sdhci-iproc.c
+@@ -196,7 +196,8 @@ static const struct sdhci_ops sdhci_ipro
+ };
+ static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = {
+-      .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
++      .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
++                SDHCI_QUIRK_NO_HISPD_BIT,
+       .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON,
+       .ops = &sdhci_iproc_32only_ops,
+ };
diff --git a/queue-5.1/mmc-sdhci-iproc-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch b/queue-5.1/mmc-sdhci-iproc-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
new file mode 100644 (file)
index 0000000..674c810
--- /dev/null
@@ -0,0 +1,44 @@
+From ec0970e0a1b2c807c908d459641a9f9a1be3e130 Mon Sep 17 00:00:00 2001
+From: Trac Hoang <trac.hoang@broadcom.com>
+Date: Thu, 9 May 2019 10:24:27 -0700
+Subject: mmc: sdhci-iproc: Set NO_HISPD bit to fix HS50 data hold time problem
+
+From: Trac Hoang <trac.hoang@broadcom.com>
+
+commit ec0970e0a1b2c807c908d459641a9f9a1be3e130 upstream.
+
+The iproc host eMMC/SD controller hold time does not meet the
+specification in the HS50 mode.  This problem can be mitigated
+by disabling the HISPD bit; thus forcing the controller output
+data to be driven on the falling clock edges rather than the
+rising clock edges.
+
+Stable tag (v4.12+) chosen to assist stable kernel maintainers so that
+the change does not produce merge conflicts backporting to older kernel
+versions. In reality, the timing bug existed since the driver was first
+introduced but there is no need for this driver to be supported in kernel
+versions that old.
+
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Trac Hoang <trac.hoang@broadcom.com>
+Signed-off-by: Scott Branden <scott.branden@broadcom.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/sdhci-iproc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-iproc.c
++++ b/drivers/mmc/host/sdhci-iproc.c
+@@ -220,7 +220,8 @@ static const struct sdhci_iproc_data ipr
+ static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
+       .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+-                SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
++                SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 |
++                SDHCI_QUIRK_NO_HISPD_BIT,
+       .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
+       .ops = &sdhci_iproc_ops,
+ };
diff --git a/queue-5.1/revert-scsi-sd-keep-disk-read-only-when-re-reading-partition.patch b/queue-5.1/revert-scsi-sd-keep-disk-read-only-when-re-reading-partition.patch
new file mode 100644 (file)
index 0000000..9b54b4e
--- /dev/null
@@ -0,0 +1,51 @@
+From 8acf608e602f6ec38b7cc37b04c80f1ce9a1a6cc Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Mon, 20 May 2019 10:57:18 -0400
+Subject: Revert "scsi: sd: Keep disk read-only when re-reading partition"
+
+From: Martin K. Petersen <martin.petersen@oracle.com>
+
+commit 8acf608e602f6ec38b7cc37b04c80f1ce9a1a6cc upstream.
+
+This reverts commit 20bd1d026aacc5399464f8328f305985c493cde3.
+
+This patch introduced regressions for devices that come online in
+read-only state and subsequently switch to read-write.
+
+Given how the partition code is currently implemented it is not
+possible to persist the read-only flag across a device revalidate
+call. This may need to get addressed in the future since it is common
+for user applications to proactively call BLKRRPART.
+
+Reverting this commit will re-introduce a regression where a
+device-initiated revalidate event will cause the admin state to be
+forgotten. A separate patch will address this issue.
+
+Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -2603,7 +2603,6 @@ sd_read_write_protect_flag(struct scsi_d
+       int res;
+       struct scsi_device *sdp = sdkp->device;
+       struct scsi_mode_data data;
+-      int disk_ro = get_disk_ro(sdkp->disk);
+       int old_wp = sdkp->write_prot;
+       set_disk_ro(sdkp->disk, 0);
+@@ -2644,7 +2643,7 @@ sd_read_write_protect_flag(struct scsi_d
+                         "Test WP failed, assume Write Enabled\n");
+       } else {
+               sdkp->write_prot = ((data.device_specific & 0x80) != 0);
+-              set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
++              set_disk_ro(sdkp->disk, sdkp->write_prot);
+               if (sdkp->first_scan || old_wp != sdkp->write_prot) {
+                       sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n",
+                                 sdkp->write_prot ? "on" : "off");
diff --git a/queue-5.1/sbitmap-fix-improper-use-of-smp_mb__before_atomic.patch b/queue-5.1/sbitmap-fix-improper-use-of-smp_mb__before_atomic.patch
new file mode 100644 (file)
index 0000000..cdc8d70
--- /dev/null
@@ -0,0 +1,44 @@
+From a0934fd2b1208458e55fc4b48f55889809fce666 Mon Sep 17 00:00:00 2001
+From: Andrea Parri <andrea.parri@amarulasolutions.com>
+Date: Mon, 20 May 2019 19:23:57 +0200
+Subject: sbitmap: fix improper use of smp_mb__before_atomic()
+
+From: Andrea Parri <andrea.parri@amarulasolutions.com>
+
+commit a0934fd2b1208458e55fc4b48f55889809fce666 upstream.
+
+This barrier only applies to the read-modify-write operations; in
+particular, it does not apply to the atomic_set() primitive.
+
+Replace the barrier with an smp_mb().
+
+Fixes: 6c0ca7ae292ad ("sbitmap: fix wakeup hang after sbq resize")
+Cc: stable@vger.kernel.org
+Reported-by: "Paul E. McKenney" <paulmck@linux.ibm.com>
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Omar Sandoval <osandov@fb.com>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: linux-block@vger.kernel.org
+Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/sbitmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/sbitmap.c
++++ b/lib/sbitmap.c
+@@ -435,7 +435,7 @@ static void sbitmap_queue_update_wake_ba
+                * to ensure that the batch size is updated before the wait
+                * counts.
+                */
+-              smp_mb__before_atomic();
++              smp_mb();
+               for (i = 0; i < SBQ_WAIT_QUEUES; i++)
+                       atomic_set(&sbq->ws[i].wait_cnt, 1);
+       }
index 00ad7fd0f9eea055a1c3f34eec896d16c88b294b..ba68d9ec4b068764be523d9db8f0258b5f84fe7d 100644 (file)
@@ -1,3 +1,18 @@
 x86-hide-the-int3_emulate_call-jmp-functions-from-uml.patch
 ext4-do-not-delete-unlinked-inode-from-orphan-list-on-failed-truncate.patch
 ext4-wait-for-outstanding-dio-during-truncate-in-nojournal-mode.patch
+kvm-x86-fix-return-value-for-reserved-efer.patch
+x86-kvm-pmu-set-amd-s-virt-pmu-version-to-1.patch
+bio-fix-improper-use-of-smp_mb__before_atomic.patch
+sbitmap-fix-improper-use-of-smp_mb__before_atomic.patch
+revert-scsi-sd-keep-disk-read-only-when-re-reading-partition.patch
+crypto-hash-fix-incorrect-hash_max_descsize.patch
+crypto-vmx-ctr-always-increment-iv-as-quadword.patch
+mmc-sdhci-iproc-cygnus-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
+mmc-sdhci-iproc-set-no_hispd-bit-to-fix-hs50-data-hold-time-problem.patch
+tracing-add-a-check_val-check-before-updating-cond_snapshot-track_val.patch
+dax-arrange-for-dax_supported-check-to-span-multiple-devices.patch
+kvm-check-irqchip-mode-before-assign-irqfd.patch
+kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch
+kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch
+libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch
diff --git a/queue-5.1/tracing-add-a-check_val-check-before-updating-cond_snapshot-track_val.patch b/queue-5.1/tracing-add-a-check_val-check-before-updating-cond_snapshot-track_val.patch
new file mode 100644 (file)
index 0000000..afe1b72
--- /dev/null
@@ -0,0 +1,175 @@
+From 9b2ca371b1505a547217b244f903ad3fb86fa5b4 Mon Sep 17 00:00:00 2001
+From: Tom Zanussi <tom.zanussi@linux.intel.com>
+Date: Thu, 18 Apr 2019 10:18:52 -0500
+Subject: tracing: Add a check_val() check before updating cond_snapshot() track_val
+
+From: Tom Zanussi <tom.zanussi@linux.intel.com>
+
+commit 9b2ca371b1505a547217b244f903ad3fb86fa5b4 upstream.
+
+Without this check a snapshot is taken whenever a bucket's max is hit,
+rather than only when the global max is hit, as it should be.
+
+Before:
+
+  In this example, we do a first run of the workload (cyclictest),
+  examine the output, note the max ('triggering value') (347), then do
+  a second run and note the max again.
+
+  In this case, the max in the second run (39) is below the max in the
+  first run, but since we haven't cleared the histogram, the first max
+  is still in the histogram and is higher than any other max, so it
+  should still be the max for the snapshot.  It isn't however - the
+  value should still be 347 after the second run.
+
+  # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_prio,next_comm,prev_pid,prev_prio,prev_comm):onmax($wakeup_lat).snapshot() if next_comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+  # cyclictest -p 80 -n -s -t 2 -D 2
+
+  # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+
+  { next_pid:       2143 } hitcount:        199
+    max:         44  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/4
+
+  { next_pid:       2145 } hitcount:       1325
+    max:         38  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/2
+
+  { next_pid:       2144 } hitcount:       1982
+    max:        347  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+
+  Snapshot taken (see tracing/snapshot).  Details:
+      triggering value { onmax($wakeup_lat) }:        347
+      triggered by event with key: { next_pid:       2144 }
+
+  # cyclictest -p 80 -n -s -t 2 -D 2
+
+  # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+
+  { next_pid:       2143 } hitcount:        199
+    max:         44  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/4
+
+  { next_pid:       2148 } hitcount:        199
+    max:         16  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
+
+  { next_pid:       2145 } hitcount:       1325
+    max:         38  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/2
+
+  { next_pid:       2150 } hitcount:       1326
+    max:         39  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/4
+
+  { next_pid:       2144 } hitcount:       1982
+    max:        347  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+
+  { next_pid:       2149 } hitcount:       1983
+    max:        130  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/0
+
+  Snapshot taken (see tracing/snapshot).  Details:
+    triggering value { onmax($wakeup_lat) }:    39
+    triggered by event with key: { next_pid:       2150 }
+
+After:
+
+  In this example, we do a first run of the workload (cyclictest),
+  examine the output, note the max ('triggering value') (375), then do
+  a second run and note the max again.
+
+  In this case, the max in the second run is still 375, the highest in
+  any bucket, as it should be.
+
+  # cyclictest -p 80 -n -s -t 2 -D 2
+
+  # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+
+  { next_pid:       2072 } hitcount:        200
+    max:         28  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/5
+
+  { next_pid:       2074 } hitcount:       1323
+    max:        375  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/2
+
+  { next_pid:       2073 } hitcount:       1980
+    max:        153  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+
+  Snapshot taken (see tracing/snapshot).  Details:
+    triggering value { onmax($wakeup_lat) }:        375
+    triggered by event with key: { next_pid:       2074 }
+
+  # cyclictest -p 80 -n -s -t 2 -D 2
+
+  # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+
+  { next_pid:       2101 } hitcount:        199
+    max:         49  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+
+  { next_pid:       2072 } hitcount:        200
+    max:         28  next_prio:        120  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/5
+
+  { next_pid:       2074 } hitcount:       1323
+    max:        375  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/2
+
+  { next_pid:       2103 } hitcount:       1325
+    max:         74  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/4
+
+  { next_pid:       2073 } hitcount:       1980
+    max:        153  next_prio:         19  next_comm: cyclictest
+    prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+
+  { next_pid:       2102 } hitcount:       1981
+    max:         84  next_prio:         19  next_comm: cyclictest
+    prev_pid:         12  prev_prio:        120  prev_comm: kworker/0:1
+
+  Snapshot taken (see tracing/snapshot).  Details:
+    triggering value { onmax($wakeup_lat) }:        375
+    triggered by event with key: { next_pid:       2074 }
+
+Link: http://lkml.kernel.org/r/95958351329f129c07504b4d1769c47a97b70d65.1555597045.git.tom.zanussi@linux.intel.com
+
+Cc: stable@vger.kernel.org
+Fixes: a3785b7eca8fd ("tracing: Add hist trigger snapshot() action")
+Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_events_hist.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -3543,14 +3543,20 @@ static bool cond_snapshot_update(struct
+       struct track_data *track_data = tr->cond_snapshot->cond_data;
+       struct hist_elt_data *elt_data, *track_elt_data;
+       struct snapshot_context *context = cond_data;
++      struct action_data *action;
+       u64 track_val;
+       if (!track_data)
+               return false;
++      action = track_data->action_data;
++
+       track_val = get_track_val(track_data->hist_data, context->elt,
+                                 track_data->action_data);
++      if (!action->track_data.check_val(track_data->track_val, track_val))
++              return false;
++
+       track_data->track_val = track_val;
+       memcpy(track_data->key, context->key, track_data->key_len);
diff --git a/queue-5.1/x86-kvm-pmu-set-amd-s-virt-pmu-version-to-1.patch b/queue-5.1/x86-kvm-pmu-set-amd-s-virt-pmu-version-to-1.patch
new file mode 100644 (file)
index 0000000..4deec76
--- /dev/null
@@ -0,0 +1,72 @@
+From a80c4ec10ed9632c44c829452dc40a0443ff4e85 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 8 May 2019 19:02:48 +0200
+Subject: x86/kvm/pmu: Set AMD's virt PMU version to 1
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Borislav Petkov <bp@suse.de>
+
+commit a80c4ec10ed9632c44c829452dc40a0443ff4e85 upstream.
+
+After commit:
+
+  672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU")
+
+my AMD guests started #GPing like this:
+
+  general protection fault: 0000 [#1] PREEMPT SMP
+  CPU: 1 PID: 4355 Comm: bash Not tainted 5.1.0-rc6+ #3
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
+  RIP: 0010:x86_perf_event_update+0x3b/0xa0
+
+with Code: pointing to RDPMC. It is RDPMC because the guest has the
+hardware watchdog CONFIG_HARDLOCKUP_DETECTOR_PERF enabled which uses
+perf. Instrumenting kvm_pmu_rdpmc() some, showed that it fails due to:
+
+  if (!pmu->version)
+       return 1;
+
+which the above commit added. Since AMD's PMU leaves the version at 0,
+that causes the #GP injection into the guest.
+
+Set pmu->version arbitrarily to 1 and move it above the non-applicable
+struct kvm_pmu members.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
+Cc: kvm@vger.kernel.org
+Cc: Liran Alon <liran.alon@oracle.com>
+Cc: Mihai Carabas <mihai.carabas@oracle.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: "Radim Krčmář" <rkrcmar@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: x86@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU")
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/pmu_amd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/pmu_amd.c
++++ b/arch/x86/kvm/pmu_amd.c
+@@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_v
+       pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+       pmu->reserved_bits = 0xffffffff00200000ull;
++      pmu->version = 1;
+       /* not applicable to AMD; but clean them to prevent any fall out */
+       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+       pmu->nr_arch_fixed_counters = 0;
+-      pmu->version = 0;
+       pmu->global_status = 0;
+ }