]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Sep 2021 11:36:21 +0000 (13:36 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Sep 2021 11:36:21 +0000 (13:36 +0200)
added patches:
arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
fuse-flush-extending-writes.patch
fuse-truncate-pagecache-on-atomic_o_trunc.patch
fuse-wait-for-writepages-in-syncfs.patch
ima-remove-the-dependency-on-crypto_md5.patch
ima-remove-wmissing-prototypes-warning.patch
io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch

21 files changed:
queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch [new file with mode: 0644]
queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch [new file with mode: 0644]
queue-5.13/fuse-flush-extending-writes.patch [new file with mode: 0644]
queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch [new file with mode: 0644]
queue-5.13/fuse-wait-for-writepages-in-syncfs.patch [new file with mode: 0644]
queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch [new file with mode: 0644]
queue-5.13/ima-remove-wmissing-prototypes-warning.patch [new file with mode: 0644]
queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch [new file with mode: 0644]
queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch [new file with mode: 0644]
queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch [new file with mode: 0644]
queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch [new file with mode: 0644]
queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch [new file with mode: 0644]
queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch [new file with mode: 0644]
queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch [new file with mode: 0644]
queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch [new file with mode: 0644]
queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch [new file with mode: 0644]
queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch [new file with mode: 0644]
queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch [new file with mode: 0644]
queue-5.13/series
queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch [new file with mode: 0644]
queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch [new file with mode: 0644]

diff --git a/queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch b/queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
new file mode 100644 (file)
index 0000000..3acb65d
--- /dev/null
@@ -0,0 +1,189 @@
+From bf781869e5cf3e4ec1a47dad69b6f0df97629cbd Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Tue, 27 Jul 2021 10:40:05 +0300
+Subject: ARM: dts: at91: add pinctrl-{names, 0} for all gpios
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit bf781869e5cf3e4ec1a47dad69b6f0df97629cbd upstream.
+
+Add pinctrl-names and pinctrl-0 properties on controllers that claims to
+use pins to avoid failures due to
+commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges")
+and also to avoid using pins that may be claimed my other IPs.
+
+Fixes: b7c2b6157079 ("ARM: at91: add Atmel's SAMA5D3 Xplained board")
+Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board")
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Link: https://lore.kernel.org/r/20210727074006.1609989-1-claudiu.beznea@microchip.com
+Cc: <stable@vger.kernel.org> # v5.7+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/at91-sam9x60ek.dts        |   16 ++++++++++++++-
+ arch/arm/boot/dts/at91-sama5d3_xplained.dts |   29 ++++++++++++++++++++++++++++
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts |   19 ++++++++++++++++++
+ 3 files changed, 63 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
++++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
+@@ -92,6 +92,8 @@
+       leds {
+               compatible = "gpio-leds";
++              pinctrl-names = "default";
++              pinctrl-0 = <&pinctrl_gpio_leds>;
+               status = "okay"; /* Conflict with pwm0. */
+               red {
+@@ -537,6 +539,10 @@
+                                AT91_PIOA 19 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)     /* PA19 DAT2 periph A with pullup */
+                                AT91_PIOA 20 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)>;   /* PA20 DAT3 periph A with pullup */
+               };
++              pinctrl_sdmmc0_cd: sdmmc0_cd {
++                      atmel,pins =
++                              <AT91_PIOA 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++              };
+       };
+       sdmmc1 {
+@@ -569,6 +575,14 @@
+                                     AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+               };
+       };
++
++      leds {
++              pinctrl_gpio_leds: gpio_leds {
++                      atmel,pins = <AT91_PIOB 11 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++                                    AT91_PIOB 12 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++                                    AT91_PIOB 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++              };
++      };
+ }; /* pinctrl */
+ &pwm0 {
+@@ -580,7 +594,7 @@
+ &sdmmc0 {
+       bus-width = <4>;
+       pinctrl-names = "default";
+-      pinctrl-0 = <&pinctrl_sdmmc0_default>;
++      pinctrl-0 = <&pinctrl_sdmmc0_default &pinctrl_sdmmc0_cd>;
+       status = "okay";
+       cd-gpios = <&pioA 23 GPIO_ACTIVE_LOW>;
+       disable-wp;
+--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+@@ -57,6 +57,8 @@
+                       };
+                       spi0: spi@f0004000 {
++                              pinctrl-names = "default";
++                              pinctrl-0 = <&pinctrl_spi0_cs>;
+                               cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
+                               status = "okay";
+                       };
+@@ -169,6 +171,8 @@
+                       };
+                       spi1: spi@f8008000 {
++                              pinctrl-names = "default";
++                              pinctrl-0 = <&pinctrl_spi1_cs>;
+                               cs-gpios = <&pioC 25 0>;
+                               status = "okay";
+                       };
+@@ -248,6 +252,26 @@
+                                                       <AT91_PIOE 3 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
+                                                        AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+                                       };
++
++                                      pinctrl_gpio_leds: gpio_leds_default {
++                                              atmel,pins =
++                                                      <AT91_PIOE 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++                                                       AT91_PIOE 24 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
++
++                                      pinctrl_spi0_cs: spi0_cs_default {
++                                              atmel,pins =
++                                                      <AT91_PIOD 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++                                                       AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
++
++                                      pinctrl_spi1_cs: spi1_cs_default {
++                                              atmel,pins = <AT91_PIOC 25 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
++
++                                      pinctrl_vcc_mmc0_reg_gpio: vcc_mmc0_reg_gpio_default {
++                                              atmel,pins = <AT91_PIOE 2 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
+                               };
+                       };
+               };
+@@ -339,6 +363,8 @@
+       vcc_mmc0_reg: fixedregulator_mmc0 {
+               compatible = "regulator-fixed";
++              pinctrl-names = "default";
++              pinctrl-0 = <&pinctrl_vcc_mmc0_reg_gpio>;
+               gpio = <&pioE 2 GPIO_ACTIVE_LOW>;
+               regulator-name = "mmc0-card-supply";
+               regulator-min-microvolt = <3300000>;
+@@ -362,6 +388,9 @@
+       leds {
+               compatible = "gpio-leds";
++              pinctrl-names = "default";
++              pinctrl-0 = <&pinctrl_gpio_leds>;
++              status = "okay";
+               d2 {
+                       label = "d2";
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -90,6 +90,8 @@
+                       };
+                       spi1: spi@fc018000 {
++                              pinctrl-names = "default";
++                              pinctrl-0 = <&pinctrl_spi0_cs>;
+                               cs-gpios = <&pioB 21 0>;
+                               status = "okay";
+                       };
+@@ -147,6 +149,19 @@
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+                                       };
++                                      pinctrl_spi0_cs: spi0_cs_default {
++                                              atmel,pins =
++                                                      <AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
++                                      pinctrl_gpio_leds: gpio_leds_default {
++                                              atmel,pins =
++                                                      <AT91_PIOD 30 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++                                                       AT91_PIOE 15 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
++                                      pinctrl_vcc_mmc1_reg: vcc_mmc1_reg {
++                                              atmel,pins =
++                                                      <AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++                                      };
+                               };
+                       };
+               };
+@@ -252,6 +267,8 @@
+       leds {
+               compatible = "gpio-leds";
++              pinctrl-names = "default";
++              pinctrl-0 = <&pinctrl_gpio_leds>;
+               status = "okay";
+               d8 {
+@@ -278,6 +295,8 @@
+       vcc_mmc1_reg: fixedregulator_mmc1 {
+               compatible = "regulator-fixed";
++              pinctrl-names = "default";
++              pinctrl-0 = <&pinctrl_vcc_mmc1_reg>;
+               gpio = <&pioE 4 GPIO_ACTIVE_LOW>;
+               regulator-name = "VDD MCI1";
+               regulator-min-microvolt = <3300000>;
diff --git a/queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch b/queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
new file mode 100644 (file)
index 0000000..18cafc4
--- /dev/null
@@ -0,0 +1,33 @@
+From 847fdae1579f4ee930b01f24a7847b8043bf468c Mon Sep 17 00:00:00 2001
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+Date: Tue, 27 Jul 2021 20:13:12 +0300
+Subject: char: tpm: Kconfig: remove bad i2c cr50 select
+
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+
+commit 847fdae1579f4ee930b01f24a7847b8043bf468c upstream.
+
+This fixes a minor bug which went unnoticed during the initial
+driver upstreaming review: TCG_CR50 does not exist in mainline
+kernels, so remove it.
+
+Fixes: 3a253caaad11 ("char: tpm: add i2c driver for cr50")
+Cc: stable@vger.kernel.org
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/tpm/Kconfig
++++ b/drivers/char/tpm/Kconfig
+@@ -89,7 +89,6 @@ config TCG_TIS_SYNQUACER
+ config TCG_TIS_I2C_CR50
+       tristate "TPM Interface Specification 2.0 Interface (I2C - CR50)"
+       depends on I2C
+-      select TCG_CR50
+       help
+         This is a driver for the Google cr50 I2C TPM interface which is a
+         custom microcontroller and requires a custom i2c protocol interface
diff --git a/queue-5.13/fuse-flush-extending-writes.patch b/queue-5.13/fuse-flush-extending-writes.patch
new file mode 100644 (file)
index 0000000..b18abd3
--- /dev/null
@@ -0,0 +1,49 @@
+From 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 31 Aug 2021 14:18:08 +0200
+Subject: fuse: flush extending writes
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd upstream.
+
+Callers of fuse_writeback_range() assume that the file is ready for
+modification by the server in the supplied byte range after the call
+returns.
+
+If there's a write that extends the file beyond the end of the supplied
+range, then the file needs to be extended to at least the end of the range,
+but currently that's not done.
+
+There are at least two cases where this can cause problems:
+
+ - copy_file_range() will return short count if the file is not extended
+   up to end of the source range.
+
+ - FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE will not extend the file,
+   hence the region may not be fully allocated.
+
+Fix by flushing writes from the start of the range up to the end of the
+file.  This could be optimized if the writes are non-extending, etc, but
+it's probably not worth the trouble.
+
+Fixes: a2bc92362941 ("fuse: fix copy_file_range() in the writeback case")
+Fixes: 6b1bdb56b17c ("fuse: allow fallocate(FALLOC_FL_ZERO_RANGE)")
+Cc: <stable@vger.kernel.org>  # v5.2
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -2886,7 +2886,7 @@ fuse_direct_IO(struct kiocb *iocb, struc
+ static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
+ {
+-      int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
++      int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
+       if (!err)
+               fuse_sync_writes(inode);
diff --git a/queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch b/queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch
new file mode 100644 (file)
index 0000000..04fd413
--- /dev/null
@@ -0,0 +1,58 @@
+From 76224355db7570cbe6b6f75c8929a1558828dd55 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 17 Aug 2021 21:05:16 +0200
+Subject: fuse: truncate pagecache on atomic_o_trunc
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 76224355db7570cbe6b6f75c8929a1558828dd55 upstream.
+
+fuse_finish_open() will be called with FUSE_NOWRITE in case of atomic
+O_TRUNC.  This can deadlock with fuse_wait_on_page_writeback() in
+fuse_launder_page() triggered by invalidate_inode_pages2().
+
+Fix by replacing invalidate_inode_pages2() in fuse_finish_open() with a
+truncate_pagecache() call.  This makes sense regardless of FOPEN_KEEP_CACHE
+or fc->writeback cache, so do it unconditionally.
+
+Reported-by: Xie Yongji <xieyongji@bytedance.com>
+Reported-and-tested-by: syzbot+bea44a5189836d956894@syzkaller.appspotmail.com
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inod
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+-      if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+-              invalidate_inode_pages2(inode->i_mapping);
+       if (ff->open_flags & FOPEN_STREAM)
+               stream_open(inode, file);
+       else if (ff->open_flags & FOPEN_NONSEEKABLE)
+               nonseekable_open(inode, file);
++
+       if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+               struct fuse_inode *fi = get_fuse_inode(inode);
+@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inod
+               fi->attr_version = atomic64_inc_return(&fc->attr_version);
+               i_size_write(inode, 0);
+               spin_unlock(&fi->lock);
++              truncate_pagecache(inode, 0);
+               fuse_invalidate_attr(inode);
+               if (fc->writeback_cache)
+                       file_update_time(file);
++      } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
++              invalidate_inode_pages2(inode->i_mapping);
+       }
++
+       if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+               fuse_link_write_file(file);
+ }
diff --git a/queue-5.13/fuse-wait-for-writepages-in-syncfs.patch b/queue-5.13/fuse-wait-for-writepages-in-syncfs.patch
new file mode 100644 (file)
index 0000000..15f0979
--- /dev/null
@@ -0,0 +1,242 @@
+From 660585b56e63ca034ad506ea53c807c5cdca3196 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 1 Sep 2021 12:39:02 +0200
+Subject: fuse: wait for writepages in syncfs
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 660585b56e63ca034ad506ea53c807c5cdca3196 upstream.
+
+In case of fuse the MM subsystem doesn't guarantee that page writeback
+completes by the time ->sync_fs() is called.  This is because fuse
+completes page writeback immediately to prevent DoS of memory reclaim by
+the userspace file server.
+
+This means that fuse itself must ensure that writes are synced before
+sending the SYNCFS request to the server.
+
+Introduce sync buckets, that hold a counter for the number of outstanding
+write requests.  On syncfs replace the current bucket with a new one and
+wait until the old bucket's counter goes down to zero.
+
+It is possible to have multiple syncfs calls in parallel, in which case
+there could be more than one waited-on buckets.  Descendant buckets must
+not complete until the parent completes.  Add a count to the child (new)
+bucket until the (parent) old bucket completes.
+
+Use RCU protection to dereference the current bucket and to wake up an
+emptied bucket.  Use fc->lock to protect against parallel assignments to
+the current bucket.
+
+This leaves just the counter to be a possible scalability issue.  The
+fc->num_waiting counter has a similar issue, so both should be addressed at
+the same time.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Fixes: 2d82ab251ef0 ("virtiofs: propagate sync() to file server")
+Cc: <stable@vger.kernel.org> # v5.14
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c   |   21 +++++++++++++++++++
+ fs/fuse/fuse_i.h |   19 +++++++++++++++++
+ fs/fuse/inode.c  |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 100 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -392,6 +392,7 @@ struct fuse_writepage_args {
+       struct list_head queue_entry;
+       struct fuse_writepage_args *next;
+       struct inode *inode;
++      struct fuse_sync_bucket *bucket;
+ };
+ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
+@@ -1613,6 +1614,9 @@ static void fuse_writepage_free(struct f
+       struct fuse_args_pages *ap = &wpa->ia.ap;
+       int i;
++      if (wpa->bucket)
++              fuse_sync_bucket_dec(wpa->bucket);
++
+       for (i = 0; i < ap->num_pages; i++)
+               __free_page(ap->pages[i]);
+@@ -1876,6 +1880,20 @@ static struct fuse_writepage_args *fuse_
+ }
++static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
++                                       struct fuse_writepage_args *wpa)
++{
++      if (!fc->sync_fs)
++              return;
++
++      rcu_read_lock();
++      /* Prevent resurrection of dead bucket in unlikely race with syncfs */
++      do {
++              wpa->bucket = rcu_dereference(fc->curr_bucket);
++      } while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
++      rcu_read_unlock();
++}
++
+ static int fuse_writepage_locked(struct page *page)
+ {
+       struct address_space *mapping = page->mapping;
+@@ -1903,6 +1921,7 @@ static int fuse_writepage_locked(struct
+       if (!wpa->ia.ff)
+               goto err_nofile;
++      fuse_writepage_add_to_bucket(fc, wpa);
+       fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+       copy_highpage(tmp_page, page);
+@@ -2153,6 +2172,8 @@ static int fuse_writepages_fill(struct p
+                       __free_page(tmp_page);
+                       goto out_unlock;
+               }
++              fuse_writepage_add_to_bucket(fc, wpa);
++
+               data->max_pages = 1;
+               ap = &wpa->ia.ap;
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -515,6 +515,13 @@ struct fuse_fs_context {
+       void **fudptr;
+ };
++struct fuse_sync_bucket {
++      /* count is a possible scalability bottleneck */
++      atomic_t count;
++      wait_queue_head_t waitq;
++      struct rcu_head rcu;
++};
++
+ /**
+  * A Fuse connection.
+  *
+@@ -807,6 +814,9 @@ struct fuse_conn {
+       /** List of filesystems using this connection */
+       struct list_head mounts;
++
++      /* New writepages go into this bucket */
++      struct fuse_sync_bucket __rcu *curr_bucket;
+ };
+ /*
+@@ -910,6 +920,15 @@ static inline void fuse_page_descs_lengt
+               descs[i].length = PAGE_SIZE - descs[i].offset;
+ }
++static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
++{
++      /* Need RCU protection to prevent use after free after the decrement */
++      rcu_read_lock();
++      if (atomic_dec_and_test(&bucket->count))
++              wake_up(&bucket->waitq);
++      rcu_read_unlock();
++}
++
+ /** Device operations */
+ extern const struct file_operations fuse_dev_operations;
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -506,6 +506,57 @@ static int fuse_statfs(struct dentry *de
+       return err;
+ }
++static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
++{
++      struct fuse_sync_bucket *bucket;
++
++      bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
++      if (bucket) {
++              init_waitqueue_head(&bucket->waitq);
++              /* Initial active count */
++              atomic_set(&bucket->count, 1);
++      }
++      return bucket;
++}
++
++static void fuse_sync_fs_writes(struct fuse_conn *fc)
++{
++      struct fuse_sync_bucket *bucket, *new_bucket;
++      int count;
++
++      new_bucket = fuse_sync_bucket_alloc();
++      spin_lock(&fc->lock);
++      bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++      count = atomic_read(&bucket->count);
++      WARN_ON(count < 1);
++      /* No outstanding writes? */
++      if (count == 1) {
++              spin_unlock(&fc->lock);
++              kfree(new_bucket);
++              return;
++      }
++
++      /*
++       * Completion of new bucket depends on completion of this bucket, so add
++       * one more count.
++       */
++      atomic_inc(&new_bucket->count);
++      rcu_assign_pointer(fc->curr_bucket, new_bucket);
++      spin_unlock(&fc->lock);
++      /*
++       * Drop initial active count.  At this point if all writes in this and
++       * ancestor buckets complete, the count will go to zero and this task
++       * will be woken up.
++       */
++      atomic_dec(&bucket->count);
++
++      wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
++
++      /* Drop temp count on descendant bucket */
++      fuse_sync_bucket_dec(new_bucket);
++      kfree_rcu(bucket, rcu);
++}
++
+ static int fuse_sync_fs(struct super_block *sb, int wait)
+ {
+       struct fuse_mount *fm = get_fuse_mount_super(sb);
+@@ -528,6 +579,8 @@ static int fuse_sync_fs(struct super_blo
+       if (!fc->sync_fs)
+               return 0;
++      fuse_sync_fs_writes(fc);
++
+       memset(&inarg, 0, sizeof(inarg));
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+@@ -763,6 +816,7 @@ void fuse_conn_put(struct fuse_conn *fc)
+ {
+       if (refcount_dec_and_test(&fc->count)) {
+               struct fuse_iqueue *fiq = &fc->iq;
++              struct fuse_sync_bucket *bucket;
+               if (IS_ENABLED(CONFIG_FUSE_DAX))
+                       fuse_dax_conn_free(fc);
+@@ -770,6 +824,11 @@ void fuse_conn_put(struct fuse_conn *fc)
+                       fiq->ops->release(fiq);
+               put_pid_ns(fc->pid_ns);
+               put_user_ns(fc->user_ns);
++              bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++              if (bucket) {
++                      WARN_ON(atomic_read(&bucket->count) != 1);
++                      kfree(bucket);
++              }
+               fc->release(fc);
+       }
+ }
+@@ -1366,6 +1425,7 @@ int fuse_fill_super_common(struct super_
+       if (sb->s_flags & SB_MANDLOCK)
+               goto err;
++      rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
+       fuse_sb_defaults(sb);
+       if (ctx->is_bdev) {
diff --git a/queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch b/queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch
new file mode 100644 (file)
index 0000000..d8acf2e
--- /dev/null
@@ -0,0 +1,45 @@
+From 8510505d55e194d3f6c9644c9f9d12c4f6b0395a Mon Sep 17 00:00:00 2001
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+Date: Mon, 16 Aug 2021 08:10:59 +0000
+Subject: IMA: remove the dependency on CRYPTO_MD5
+
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+
+commit 8510505d55e194d3f6c9644c9f9d12c4f6b0395a upstream.
+
+MD5 is a weak digest algorithm that shouldn't be used for cryptographic
+operation. It hinders the efficiency of a patch set that aims to limit
+the digests allowed for the extended file attribute namely security.ima.
+MD5 is no longer a requirement for IMA, nor should it be used there.
+
+The sole place where we still use the MD5 algorithm inside IMA is setting
+the ima_hash algorithm to MD5, if the user supplies 'ima_hash=md5'
+parameter on the command line.  With commit ab60368ab6a4 ("ima: Fallback
+to the builtin hash algorithm"), setting "ima_hash=md5" fails gracefully
+when CRYPTO_MD5 is not set:
+       ima: Can not allocate md5 (reason: -2)
+       ima: Allocating md5 failed, going to use default hash algorithm sha256
+
+Remove the CRYPTO_MD5 dependency for IMA.
+
+Signed-off-by: THOBY Simon <Simon.THOBY@viveris.fr>
+Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
+[zohar@linux.ibm.com: include commit number in patch description for
+stable.]
+Cc: stable@vger.kernel.org # 4.17
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/security/integrity/ima/Kconfig
++++ b/security/integrity/ima/Kconfig
+@@ -6,7 +6,6 @@ config IMA
+       select SECURITYFS
+       select CRYPTO
+       select CRYPTO_HMAC
+-      select CRYPTO_MD5
+       select CRYPTO_SHA1
+       select CRYPTO_HASH_INFO
+       select TCG_TPM if HAS_IOMEM && !UML
diff --git a/queue-5.13/ima-remove-wmissing-prototypes-warning.patch b/queue-5.13/ima-remove-wmissing-prototypes-warning.patch
new file mode 100644 (file)
index 0000000..4464b56
--- /dev/null
@@ -0,0 +1,40 @@
+From a32ad90426a9c8eb3915eed26e08ce133bd9e0da Mon Sep 17 00:00:00 2001
+From: Austin Kim <austin.kim@lge.com>
+Date: Tue, 29 Jun 2021 14:50:50 +0100
+Subject: IMA: remove -Wmissing-prototypes warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Austin Kim <austin.kim@lge.com>
+
+commit a32ad90426a9c8eb3915eed26e08ce133bd9e0da upstream.
+
+With W=1 build, the compiler throws warning message as below:
+
+   security/integrity/ima/ima_mok.c:24:12: warning:
+   no previous prototype for ‘ima_mok_init’ [-Wmissing-prototypes]
+       __init int ima_mok_init(void)
+
+Silence the warning by adding static keyword to ima_mok_init().
+
+Signed-off-by: Austin Kim <austin.kim@lge.com>
+Fixes: 41c89b64d718 ("IMA: create machine owner and blacklist keyrings")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/ima_mok.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/integrity/ima/ima_mok.c
++++ b/security/integrity/ima/ima_mok.c
+@@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring;
+ /*
+  * Allocate the IMA blacklist keyring
+  */
+-__init int ima_mok_init(void)
++static __init int ima_mok_init(void)
+ {
+       struct key_restriction *restriction;
diff --git a/queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch b/queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
new file mode 100644 (file)
index 0000000..feb447f
--- /dev/null
@@ -0,0 +1,104 @@
+From ecc53c48c13d995e6fe5559e30ffee48d92784fd Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 29 Aug 2021 16:13:03 -0600
+Subject: io-wq: check max_worker limits if a worker transitions bound state
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit ecc53c48c13d995e6fe5559e30ffee48d92784fd upstream.
+
+For the two places where new workers are created, we diligently check if
+we are allowed to create a new worker. If we're currently at the limit
+of how many workers of a given type we can have, then we don't create
+any new ones.
+
+If you have a mixed workload with various types of bound and unbounded
+work, then it can happen that a worker finishes one type of work and
+is then transitioned to the other type. For this case, we don't check
+if we are actually allowed to do so. This can cause io-wq to temporarily
+exceed the allowed number of workers for a given type.
+
+When retrieving work, check that the types match. If they don't, check
+if we are allowed to transition to the other type. If not, then don't
+handle the new work.
+
+Cc: stable@vger.kernel.org
+Reported-by: Johannes Lundberg <johalun0@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   33 ++++++++++++++++++++++++++++++---
+ 1 file changed, 30 insertions(+), 3 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -424,7 +424,28 @@ static void io_wait_on_hash(struct io_wq
+       spin_unlock(&wq->hash->wait.lock);
+ }
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
++/*
++ * We can always run the work if the worker is currently the same type as
++ * the work (eg both are bound, or both are unbound). If they are not the
++ * same, only allow it if incrementing the worker count would be allowed.
++ */
++static bool io_worker_can_run_work(struct io_worker *worker,
++                                 struct io_wq_work *work)
++{
++      struct io_wqe_acct *acct;
++
++      if (!(worker->flags & IO_WORKER_F_BOUND) !=
++          !(work->flags & IO_WQ_WORK_UNBOUND))
++              return true;
++
++      /* not the same type, check if we'd go over the limit */
++      acct = io_work_get_acct(worker->wqe, work);
++      return acct->nr_workers < acct->max_workers;
++}
++
++static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++                                         struct io_worker *worker,
++                                         bool *stalled)
+       __must_hold(wqe->lock)
+ {
+       struct io_wq_work_node *node, *prev;
+@@ -436,6 +457,9 @@ static struct io_wq_work *io_get_next_wo
+               work = container_of(node, struct io_wq_work, list);
++              if (!io_worker_can_run_work(worker, work))
++                      break;
++
+               /* not hashed, can run anytime */
+               if (!io_wq_is_hashed(work)) {
+                       wq_list_del(&wqe->work_list, node, prev);
+@@ -462,6 +486,7 @@ static struct io_wq_work *io_get_next_wo
+               raw_spin_unlock(&wqe->lock);
+               io_wait_on_hash(wqe, stall_hash);
+               raw_spin_lock(&wqe->lock);
++              *stalled = true;
+       }
+       return NULL;
+@@ -501,6 +526,7 @@ static void io_worker_handle_work(struct
+       do {
+               struct io_wq_work *work;
++              bool stalled;
+ get_next:
+               /*
+                * If we got some work, mark us as busy. If we didn't, but
+@@ -509,10 +535,11 @@ get_next:
+                * can't make progress, any work completion or insertion will
+                * clear the stalled flag.
+                */
+-              work = io_get_next_work(wqe);
++              stalled = false;
++              work = io_get_next_work(wqe, worker, &stalled);
+               if (work)
+                       __io_worker_busy(wqe, worker, work);
+-              else if (!wq_list_empty(&wqe->work_list))
++              else if (stalled)
+                       wqe->flags |= IO_WQE_FLAG_STALLED;
+               raw_spin_unlock_irq(&wqe->lock);
diff --git a/queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch b/queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
new file mode 100644 (file)
index 0000000..2324c2f
--- /dev/null
@@ -0,0 +1,54 @@
+From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Mon, 2 Aug 2021 13:38:30 +0100
+Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 47e6223c841e029bfc23c3ce594dac5525cebaf8 upstream.
+
+Booting a KVM host in protected mode with kmemleak quickly results
+in a pretty bad crash, as kmemleak doesn't know that the HYP sections
+have been taken away. This is specially true for the BSS section,
+which is part of the kernel BSS section and registered at boot time
+by kmemleak itself.
+
+Unregister the HYP part of the BSS before making that section
+HYP-private. The rest of the HYP-specific data is obtained via
+the page allocator or lives in other sections, none of which is
+subjected to kmemleak.
+
+Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 5.13
+Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/arm.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -15,6 +15,7 @@
+ #include <linux/fs.h>
+ #include <linux/mman.h>
+ #include <linux/sched.h>
++#include <linux/kmemleak.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
+@@ -1957,6 +1958,12 @@ static int finalize_hyp_mode(void)
+       if (ret)
+               return ret;
++      /*
++       * Exclude HYP BSS from kmemleak so that it doesn't get peeked
++       * at, which would end badly once the section is inaccessible.
++       * None of other sections should ever be introspected.
++       */
++      kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+       ret = pkvm_mark_hyp_section(__hyp_bss);
+       if (ret)
+               return ret;
diff --git a/queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch b/queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
new file mode 100644 (file)
index 0000000..0855571
--- /dev/null
@@ -0,0 +1,60 @@
+From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 10 Aug 2021 07:45:26 -0700
+Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7782bb8d818d8f47c26b22079db10599922787a upstream.
+
+Clear nested.pi_pending on nested VM-Enter even if L2 will run without
+posted interrupts enabled.  If nested.pi_pending is left set from a
+previous L2, vmx_complete_nested_posted_interrupt() will pick up the
+stale flag and exit to userspace with an "internal emulation error" due
+the new L2 not having a valid nested.pi_desc.
+
+Arguably, vmx_complete_nested_posted_interrupt() should first check for
+posted interrupts being enabled, but it's also completely reasonable that
+KVM wouldn't screw up a fundamental flag.  Not to mention that the mere
+existence of nested.pi_pending is a long-standing bug as KVM shouldn't
+move the posted interrupt out of the IRR until it's actually processed,
+e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit
+with a "pending" posted interrupt.  Fixing the mess is a future problem.
+
+Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI
+descriptor as an error, this was a benign bug as the null PI descriptor
+effectively served as a check on PI not being enabled.  Even then, the
+new flow did not become problematic until KVM started checking the result
+of kvm_check_nested_events().
+
+Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
+Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable")
+Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails")
+Cc: stable@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210810144526.2662272-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2226,12 +2226,11 @@ static void prepare_vmcs02_early(struct
+                        ~PIN_BASED_VMX_PREEMPTION_TIMER);
+       /* Posted interrupts setting is only taken from vmcs12.  */
+-      if (nested_cpu_has_posted_intr(vmcs12)) {
++      vmx->nested.pi_pending = false;
++      if (nested_cpu_has_posted_intr(vmcs12))
+               vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
+-              vmx->nested.pi_pending = false;
+-      } else {
++      else
+               exec_control &= ~PIN_BASED_POSTED_INTR;
+-      }
+       pin_controls_set(vmx, exec_control);
+       /*
diff --git a/queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch b/queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
new file mode 100644 (file)
index 0000000..ae66001
--- /dev/null
@@ -0,0 +1,122 @@
+From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001
+From: Halil Pasic <pasic@linux.ibm.com>
+Date: Fri, 27 Aug 2021 14:54:29 +0200
+Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Halil Pasic <pasic@linux.ibm.com>
+
+commit a3e03bc1368c1bc16e19b001fc96dc7430573cc8 upstream.
+
+While in practice vcpu->vcpu_idx ==  vcpu->vcp_id is often true, it may
+not always be, and we must not rely on this. Reason is that KVM decides
+the vcpu_idx, userspace decides the vcpu_id, thus the two might not
+match.
+
+Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies
+that code like
+for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+                vcpu = kvm_get_vcpu(kvm, vcpu_id);
+               do_stuff(vcpu);
+}
+is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an
+vcpu_id.  The trouble is, we do actually use kvm->arch.idle_mask like
+this. To fix this problem we have two options. Either use
+kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id,
+or switch to indexing via vcpu_idx. The latter is preferable for obvious
+reasons.
+
+Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to
+indexing it by vcpu_idx.  To keep gisa_int.kicked_mask indexed by the
+same index as idle_mask lets make the same change for it as well.
+
+Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array")
+Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
+Reviewed-by: Christian Bornträger <borntraeger@de.ibm.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 3.15+
+Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kvm_host.h |    1 +
+ arch/s390/kvm/interrupt.c        |   12 ++++++------
+ arch/s390/kvm/kvm-s390.c         |    2 +-
+ arch/s390/kvm/kvm-s390.h         |    2 +-
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -962,6 +962,7 @@ struct kvm_arch{
+       atomic64_t cmma_dirty_pages;
+       /* subset of available cpu features enabled by user space */
+       DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
++      /* indexed by vcpu_idx */
+       DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+       struct kvm_s390_gisa_interrupt gisa_int;
+       struct kvm_s390_pv pv;
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(st
+ static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+       kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+-      set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++      set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+       kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+-      clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++      clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vc
+ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
+ {
+-      int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
++      int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+       struct kvm_vcpu *vcpu;
+-      for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+-              vcpu = kvm_get_vcpu(kvm, vcpu_id);
++      for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
++              vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+               if (psw_ioint_disabled(vcpu))
+                       continue;
+               deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+               if (deliverable_mask) {
+                       /* lately kicked but not yet running */
+-                      if (test_and_set_bit(vcpu_id, gi->kicked_mask))
++                      if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
+                               return;
+                       kvm_s390_vcpu_wakeup(vcpu);
+                       return;
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -4020,7 +4020,7 @@ static int vcpu_pre_run(struct kvm_vcpu
+               kvm_s390_patch_guest_per_regs(vcpu);
+       }
+-      clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
++      clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+       vcpu->arch.sie_block->icptcode = 0;
+       cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct
+ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+ {
+-      return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++      return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch b/queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
new file mode 100644 (file)
index 0000000..9ccd5eb
--- /dev/null
@@ -0,0 +1,34 @@
+From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 26 Aug 2021 12:57:49 +0300
+Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 upstream.
+
+If we are emulating an invalid guest state, we don't have a correct
+exit reason, and thus we shouldn't do anything in this function.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6452,6 +6452,9 @@ static void vmx_handle_exit_irqoff(struc
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
++      if (vmx->emulation_required)
++              return;
++
+       if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+               handle_external_interrupt_irqoff(vcpu);
+       else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
diff --git a/queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch b/queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
new file mode 100644 (file)
index 0000000..1e7b967
--- /dev/null
@@ -0,0 +1,74 @@
+From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 6 Aug 2021 07:05:58 -0400
+Subject: KVM: x86: clamp host mapping level to max_level in kvm_mmu_max_mapping_level
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit ec607a564f70519b340f7eb4cfc0f4a6b55285ac upstream.
+
+This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler,
+but it does fix two bugs as well.
+
+One bug is in zapping collapsible PTEs.  If a large page size is
+disallowed but not all of them, kvm_mmu_max_mapping_level will return the
+host mapping level and the small PTEs will be zapped up to that level.
+However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and
+preserve the 2MB ones. This can happen for example when NX huge pages
+are in use.
+
+The second would happen when userspace backs guest memory
+with a 1gb hugepage but only assign a subset of the page to
+the guest.  1gb pages would be disallowed by the memslot, but
+not 2mb.  kvm_mmu_max_mapping_level() would fall through to the
+host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole
+thing into the guest.
+
+Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2754,6 +2754,7 @@ int kvm_mmu_max_mapping_level(struct kvm
+                             kvm_pfn_t pfn, int max_level)
+ {
+       struct kvm_lpage_info *linfo;
++      int host_level;
+       max_level = min(max_level, max_huge_page_level);
+       for ( ; max_level > PG_LEVEL_4K; max_level--) {
+@@ -2765,7 +2766,8 @@ int kvm_mmu_max_mapping_level(struct kvm
+       if (max_level == PG_LEVEL_4K)
+               return PG_LEVEL_4K;
+-      return host_pfn_mapping_level(kvm, gfn, pfn, slot);
++      host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
++      return min(host_level, max_level);
+ }
+ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+@@ -2789,17 +2791,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_v
+       if (!slot)
+               return PG_LEVEL_4K;
+-      level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+-      if (level == PG_LEVEL_4K)
+-              return level;
+-
+-      *req_level = level = min(level, max_level);
+-
+       /*
+        * Enforce the iTLB multihit workaround after capturing the requested
+        * level, which will be used to do precise, accurate accounting.
+        */
+-      if (huge_page_disallowed)
++      *req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
++      if (level == PG_LEVEL_4K || huge_page_disallowed)
+               return PG_LEVEL_4K;
+       /*
diff --git a/queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch b/queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
new file mode 100644 (file)
index 0000000..9bea6ae
--- /dev/null
@@ -0,0 +1,82 @@
+From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 2 Aug 2021 21:46:06 -0700
+Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage stats
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 088acd23526647844aec1c39db4ad02552c86c7b upstream.
+
+Factor in whether or not the old/new SPTEs are shadow-present when
+adjusting the large page stats in the TDP MMU.  A modified MMIO SPTE can
+toggle the page size bit, as bit 7 is used to store the MMIO generation,
+i.e. is_large_pte() can get a false positive when called on a MMIO SPTE.
+Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic
+value.
+
+Opportunistically move the logic below the check to verify at least one
+of the old/new SPTEs is shadow present.
+
+Use is/was_leaf even though is/was_present would suffice.  The code
+generation is roughly equivalent since all flags need to be computed
+prior to the code in question, and using the *_leaf flags will minimize
+the diff in a future enhancement to account all pages, i.e. will change
+the check to "is_leaf != was_leaf".
+
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+
+Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20210803044607.599629-3-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -410,6 +410,7 @@ static void __handle_changed_spte(struct
+       bool was_leaf = was_present && is_last_spte(old_spte, level);
+       bool is_leaf = is_present && is_last_spte(new_spte, level);
+       bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
++      bool was_large, is_large;
+       WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+       WARN_ON(level < PG_LEVEL_4K);
+@@ -443,13 +444,6 @@ static void __handle_changed_spte(struct
+       trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+-      if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+-              if (is_large_pte(old_spte))
+-                      atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+-              else
+-                      atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+-      }
+-
+       /*
+        * The only times a SPTE should be changed from a non-present to
+        * non-present state is when an MMIO entry is installed/modified/
+@@ -475,6 +469,18 @@ static void __handle_changed_spte(struct
+               return;
+       }
++      /*
++       * Update large page stats if a large page is being zapped, created, or
++       * is replacing an existing shadow page.
++       */
++      was_large = was_leaf && is_large_pte(old_spte);
++      is_large = is_leaf && is_large_pte(new_spte);
++      if (was_large != is_large) {
++              if (was_large)
++                      atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
++              else
++                      atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
++      }
+       if (was_leaf && is_dirty_spte(old_spte) &&
+           (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
diff --git a/queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch b/queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
new file mode 100644 (file)
index 0000000..a69dd79
--- /dev/null
@@ -0,0 +1,40 @@
+From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+Date: Wed, 28 Apr 2021 10:22:01 +0800
+Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted
+
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+
+commit d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 upstream.
+
+When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature
+especially there's a big tsc warp (like a new vCPU is hot-added into VM
+which has been up for a long time), tsc_offset is added by a large value
+then go back to guest. This causes system time jump as tsc_timestamp is
+not adjusted in the meantime and pvclock monotonic character.
+To fix this, just notify kvm to update vCPU's guest time before back to
+guest.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3223,6 +3223,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
+                       if (!msr_info->host_initiated) {
+                               s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+                               adjust_tsc_offset_guest(vcpu, adj);
++                              /* Before back to guest, tsc_timestamp must be adjusted
++                               * as well, otherwise guest's percpu pvclock time could jump.
++                               */
++                              kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+                       }
+                       vcpu->arch.ia32_tsc_adjust_msr = data;
+               }
diff --git a/queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch b/queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
new file mode 100644 (file)
index 0000000..d60b8e8
--- /dev/null
@@ -0,0 +1,82 @@
+From 46d4703b1db4c86ab5acb2331b10df999f005e8e Mon Sep 17 00:00:00 2001
+From: Xiao Ni <xni@redhat.com>
+Date: Wed, 18 Aug 2021 13:57:48 +0800
+Subject: md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
+
+From: Xiao Ni <xni@redhat.com>
+
+commit 46d4703b1db4c86ab5acb2331b10df999f005e8e upstream.
+
+We are seeing the following warning in raid10_handle_discard.
+[  695.110751] =============================
+[  695.131439] WARNING: suspicious RCU usage
+[  695.151389] 4.18.0-319.el8.x86_64+debug #1 Not tainted
+[  695.174413] -----------------------------
+[  695.192603] drivers/md/raid10.c:1776 suspicious
+rcu_dereference_check() usage!
+[  695.225107] other info that might help us debug this:
+[  695.260940] rcu_scheduler_active = 2, debug_locks = 1
+[  695.290157] no locks held by mkfs.xfs/10186.
+
+In the first loop of function raid10_handle_discard. It already
+determines which disk need to handle discard request and add the
+rdev reference count rdev->nr_pending. So the conf->mirrors will
+not change until all bios come back from underlayer disks. It
+doesn't need to use rcu_dereference to get rdev.
+
+Cc: stable@vger.kernel.org
+Fixes: d30588b2731f ('md/raid10: improve raid10 discard request')
+Signed-off-by: Xiao Ni <xni@redhat.com>
+Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid10.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1706,6 +1706,11 @@ retry_discard:
+       } else
+               r10_bio->master_bio = (struct bio *)first_r10bio;
++      /*
++       * first select target devices under rcu_lock and
++       * inc refcount on their rdev.  Record them by setting
++       * bios[x] to bio
++       */
+       rcu_read_lock();
+       for (disk = 0; disk < geo->raid_disks; disk++) {
+               struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+@@ -1737,9 +1742,6 @@ retry_discard:
+       for (disk = 0; disk < geo->raid_disks; disk++) {
+               sector_t dev_start, dev_end;
+               struct bio *mbio, *rbio = NULL;
+-              struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+-              struct md_rdev *rrdev = rcu_dereference(
+-                      conf->mirrors[disk].replacement);
+               /*
+                * Now start to calculate the start and end address for each disk.
+@@ -1769,9 +1771,12 @@ retry_discard:
+               /*
+                * It only handles discard bio which size is >= stripe size, so
+-               * dev_end > dev_start all the time
++               * dev_end > dev_start all the time.
++               * It doesn't need to use rcu lock to get rdev here. We already
++               * add rdev->nr_pending in the first loop.
+                */
+               if (r10_bio->devs[disk].bio) {
++                      struct md_rdev *rdev = conf->mirrors[disk].rdev;
+                       mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+                       mbio->bi_end_io = raid10_end_discard_request;
+                       mbio->bi_private = r10_bio;
+@@ -1784,6 +1789,7 @@ retry_discard:
+                       bio_endio(mbio);
+               }
+               if (r10_bio->devs[disk].repl_bio) {
++                      struct md_rdev *rrdev = conf->mirrors[disk].replacement;
+                       rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+                       rbio->bi_end_io = raid10_end_discard_request;
+                       rbio->bi_private = r10_bio;
diff --git a/queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch b/queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
new file mode 100644 (file)
index 0000000..8140948
--- /dev/null
@@ -0,0 +1,36 @@
+From f11dd0d80555cdc8eaf5cfc9e19c9e198217f9f1 Mon Sep 17 00:00:00 2001
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 17 Aug 2021 17:10:41 -0500
+Subject: perf/x86/amd/ibs: Extend PERF_PMU_CAP_NO_EXCLUDE to IBS Op
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit f11dd0d80555cdc8eaf5cfc9e19c9e198217f9f1 upstream.
+
+Commit:
+
+   2ff40250691e ("perf/core, arch/x86: Use PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs")
+
+neglected to do so.
+
+Fixes: 2ff40250691e ("perf/core, arch/x86: Use PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs")
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210817221048.88063-2-kim.phillips@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/amd/ibs.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/events/amd/ibs.c
++++ b/arch/x86/events/amd/ibs.c
+@@ -571,6 +571,7 @@ static struct perf_ibs perf_ibs_op = {
+               .start          = perf_ibs_start,
+               .stop           = perf_ibs_stop,
+               .read           = perf_ibs_read,
++              .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+       },
+       .msr                    = MSR_AMD64_IBSOPCTL,
+       .config_mask            = IBS_OP_CONFIG_MASK,
diff --git a/queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch b/queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
new file mode 100644 (file)
index 0000000..86e5269
--- /dev/null
@@ -0,0 +1,72 @@
+From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 31 Aug 2021 09:42:22 -0700
+Subject: Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()"
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e7177339d7b5f9594b316842122b5fda9513d5e2 upstream.
+
+Revert a misguided illegal GPA check when "translating" a non-nested GPA.
+The check is woefully incomplete as it does not fill in @exception as
+expected by all callers, which leads to KVM attempting to inject a bogus
+exception, potentially exposing kernel stack information in the process.
+
+ WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0
+ RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ Call Trace:
+  x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853
+  kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199
+  handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336
+  __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline]
+  vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038
+  vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712
+  vcpu_run arch/x86/kvm/x86.c:9779 [inline]
+  kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010
+  kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652
+
+The bug has escaped notice because practically speaking the GPA check is
+useless.  The GPA check in question only comes into play when KVM is
+walking guest page tables (or "translating" CR3), and KVM already handles
+illegal GPA checks by setting reserved bits in rsvd_bits_mask for each
+PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an
+illegal CR3.  This particular failure doesn't hit the existing reserved
+bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture
+simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging
+doesn't define any reserved PA bits checks, which KVM emulates by only
+incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32.
+
+Simply remove the bogus check.  There is zero meaningful value and no
+architectural justification for supporting guest.MAXPHYADDR < 32, and
+properly filling the exception would introduce non-trivial complexity.
+
+This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013.
+
+Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210831164224.1119728-2-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -257,12 +257,6 @@ static bool check_mmio_spte(struct kvm_v
+ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                                   struct x86_exception *exception)
+ {
+-      /* Check if guest physical address doesn't exceed guest maximum */
+-      if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
+-              exception->error_code |= PFERR_RSVD_MASK;
+-              return UNMAPPED_GVA;
+-      }
+-
+         return gpa;
+ }
index bbcf2dacf0318c7a6483cd56914320fd5bb7217e..69863b22cf0edbed6a51ff56171ff1403344ff30 100644 (file)
@@ -274,3 +274,23 @@ raid1-ensure-write-behind-bio-has-less-than-bio_max_vecs-sectors.patch
 cifs-do-not-leak-edeadlk-to-dgetents64-for-status_user_session_deleted.patch
 smb3-fix-posix-extensions-mount-option.patch
 tty-fix-data-race-between-tiocsti-and-flush_to_ldisc.patch
+x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
+perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
+x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
+revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
+kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
+kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
+kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
+kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
+kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
+kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
+kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
+arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
+io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
+md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
+char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
+fuse-truncate-pagecache-on-atomic_o_trunc.patch
+fuse-flush-extending-writes.patch
+fuse-wait-for-writepages-in-syncfs.patch
+ima-remove-wmissing-prototypes-warning.patch
+ima-remove-the-dependency-on-crypto_md5.patch
diff --git a/queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch b/queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
new file mode 100644 (file)
index 0000000..1c87eb7
--- /dev/null
@@ -0,0 +1,127 @@
+From 22aa45cb465be474e97666b3f7587ccb06ee411b Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Fri, 20 Aug 2021 14:57:03 +0200
+Subject: x86/efi: Restore Firmware IDT before calling ExitBootServices()
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit 22aa45cb465be474e97666b3f7587ccb06ee411b upstream.
+
+Commit
+
+  79419e13e808 ("x86/boot/compressed/64: Setup IDT in startup_32 boot path")
+
+introduced an IDT into the 32-bit boot path of the decompressor stub.
+But the IDT is set up before ExitBootServices() is called, and some UEFI
+firmwares rely on their own IDT.
+
+Save the firmware IDT on boot and restore it before calling into EFI
+functions to fix boot failures introduced by above commit.
+
+Fixes: 79419e13e808 ("x86/boot/compressed/64: Setup IDT in startup_32 boot path")
+Reported-by: Fabio Aiuto <fabioaiuto83@gmail.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Cc: stable@vger.kernel.org # 5.13+
+Link: https://lkml.kernel.org/r/20210820125703.32410-1-joro@8bytes.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/efi_thunk_64.S |   30 +++++++++++++++++++++---------
+ arch/x86/boot/compressed/head_64.S      |    3 +++
+ 2 files changed, 24 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ b/arch/x86/boot/compressed/efi_thunk_64.S
+@@ -5,9 +5,8 @@
+  * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+  *
+  * Because this thunking occurs before ExitBootServices() we have to
+- * restore the firmware's 32-bit GDT before we make EFI service calls,
+- * since the firmware's 32-bit IDT is still currently installed and it
+- * needs to be able to service interrupts.
++ * restore the firmware's 32-bit GDT and IDT before we make EFI service
++ * calls.
+  *
+  * On the plus side, we don't have to worry about mangling 64-bit
+  * addresses into 32-bits because we're executing with an identity
+@@ -39,7 +38,7 @@ SYM_FUNC_START(__efi64_thunk)
+       /*
+        * Convert x86-64 ABI params to i386 ABI
+        */
+-      subq    $32, %rsp
++      subq    $64, %rsp
+       movl    %esi, 0x0(%rsp)
+       movl    %edx, 0x4(%rsp)
+       movl    %ecx, 0x8(%rsp)
+@@ -49,14 +48,19 @@ SYM_FUNC_START(__efi64_thunk)
+       leaq    0x14(%rsp), %rbx
+       sgdt    (%rbx)
++      addq    $16, %rbx
++      sidt    (%rbx)
++
+       /*
+-       * Switch to gdt with 32-bit segments. This is the firmware GDT
+-       * that was installed when the kernel started executing. This
+-       * pointer was saved at the EFI stub entry point in head_64.S.
++       * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
++       * and IDT that was installed when the kernel started executing. The
++       * pointers were saved at the EFI stub entry point in head_64.S.
+        *
+        * Pass the saved DS selector to the 32-bit code, and use far return to
+        * restore the saved CS selector.
+        */
++      leaq    efi32_boot_idt(%rip), %rax
++      lidt    (%rax)
+       leaq    efi32_boot_gdt(%rip), %rax
+       lgdt    (%rax)
+@@ -67,7 +71,7 @@ SYM_FUNC_START(__efi64_thunk)
+       pushq   %rax
+       lretq
+-1:    addq    $32, %rsp
++1:    addq    $64, %rsp
+       movq    %rdi, %rax
+       pop     %rbx
+@@ -128,10 +132,13 @@ SYM_FUNC_START_LOCAL(efi_enter32)
+       /*
+        * Some firmware will return with interrupts enabled. Be sure to
+-       * disable them before we switch GDTs.
++       * disable them before we switch GDTs and IDTs.
+        */
+       cli
++      lidtl   (%ebx)
++      subl    $16, %ebx
++
+       lgdtl   (%ebx)
+       movl    %cr4, %eax
+@@ -166,6 +173,11 @@ SYM_DATA_START(efi32_boot_gdt)
+       .quad   0
+ SYM_DATA_END(efi32_boot_gdt)
++SYM_DATA_START(efi32_boot_idt)
++      .word   0
++      .quad   0
++SYM_DATA_END(efi32_boot_idt)
++
+ SYM_DATA_START(efi32_boot_cs)
+       .word   0
+ SYM_DATA_END(efi32_boot_cs)
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -319,6 +319,9 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM
+       movw    %cs, rva(efi32_boot_cs)(%ebp)
+       movw    %ds, rva(efi32_boot_ds)(%ebp)
++      /* Store firmware IDT descriptor */
++      sidtl   rva(efi32_boot_idt)(%ebp)
++
+       /* Disable paging */
+       movl    %cr0, %eax
+       btrl    $X86_CR0_PG_BIT, %eax
diff --git a/queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch b/queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
new file mode 100644 (file)
index 0000000..e358cb4
--- /dev/null
@@ -0,0 +1,67 @@
+From 527f721478bce3f49b513a733bacd19d6f34b08c Mon Sep 17 00:00:00 2001
+From: Babu Moger <babu.moger@amd.com>
+Date: Fri, 20 Aug 2021 16:52:42 -0500
+Subject: x86/resctrl: Fix a maybe-uninitialized build warning treated as error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Babu Moger <babu.moger@amd.com>
+
+commit 527f721478bce3f49b513a733bacd19d6f34b08c upstream.
+
+The recent commit
+
+  064855a69003 ("x86/resctrl: Fix default monitoring groups reporting")
+
+caused a RHEL build failure with an uninitialized variable warning
+treated as an error because it removed the default case snippet.
+
+The RHEL Makefile uses '-Werror=maybe-uninitialized' to force possibly
+uninitialized variable warnings to be treated as errors. This is also
+reported by smatch via the 0day robot.
+
+The error from the RHEL build is:
+
+  arch/x86/kernel/cpu/resctrl/monitor.c: In function ‘__mon_event_count’:
+  arch/x86/kernel/cpu/resctrl/monitor.c:261:12: error: ‘m’ may be used
+  uninitialized in this function [-Werror=maybe-uninitialized]
+    m->chunks += chunks;
+              ^~
+
+The upstream Makefile does not build using '-Werror=maybe-uninitialized'.
+So, the problem is not seen there. Fix the problem by putting back the
+default case snippet.
+
+ [ bp: note that there's nothing wrong with the code and other compilers
+   do not trigger this warning - this is being done just so the RHEL compiler
+   is happy. ]
+
+Fixes: 064855a69003 ("x86/resctrl: Fix default monitoring groups reporting")
+Reported-by: Terry Bowman <Terry.Bowman@amd.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Babu Moger <babu.moger@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/162949631908.23903.17090272726012848523.stgit@bmoger-ubuntu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/monitor.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -304,6 +304,12 @@ static u64 __mon_event_count(u32 rmid, s
+       case QOS_L3_MBM_LOCAL_EVENT_ID:
+               m = &rr->d->mbm_local[rmid];
+               break;
++      default:
++              /*
++               * Code would never reach here because an invalid
++               * event id would fail the __rmid_read.
++               */
++              return RMID_VAL_ERROR;
+       }
+       if (rr->first) {