From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 13 Sep 2021 11:36:21 +0000 (+0200)
Subject: 5.13-stable patches
X-Git-Tag: v5.4.146~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d1b8330e86cb823c98f77a5edfdc4fb764187a8d;p=thirdparty%2Fkernel%2Fstable-queue.git

5.13-stable patches

added patches:
	arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
	char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
	fuse-flush-extending-writes.patch
	fuse-truncate-pagecache-on-atomic_o_trunc.patch
	fuse-wait-for-writepages-in-syncfs.patch
	ima-remove-the-dependency-on-crypto_md5.patch
	ima-remove-wmissing-prototypes-warning.patch
	io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
	kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
	kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
	kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
	kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
	kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
	kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
	kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
	md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
	perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
	revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
	x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
	x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
---

diff --git a/queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch b/queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
new file mode 100644
index 00000000000..3acb65df343
--- /dev/null
+++ b/queue-5.13/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
@@ -0,0 +1,189 @@
+From bf781869e5cf3e4ec1a47dad69b6f0df97629cbd Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Tue, 27 Jul 2021 10:40:05 +0300
+Subject: ARM: dts: at91: add pinctrl-{names, 0} for all gpios
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit bf781869e5cf3e4ec1a47dad69b6f0df97629cbd upstream.
+
+Add pinctrl-names and pinctrl-0 properties on controllers that claims to
+use pins to avoid failures due to
+commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges")
+and also to avoid using pins that may be claimed my other IPs.
+
+Fixes: b7c2b6157079 ("ARM: at91: add Atmel's SAMA5D3 Xplained board")
+Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board")
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Link: https://lore.kernel.org/r/20210727074006.1609989-1-claudiu.beznea@microchip.com
+Cc: <stable@vger.kernel.org> # v5.7+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/at91-sam9x60ek.dts        |   16 ++++++++++++++-
+ arch/arm/boot/dts/at91-sama5d3_xplained.dts |   29 ++++++++++++++++++++++++++++
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts |   19 ++++++++++++++++++
+ 3 files changed, 63 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
++++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
+@@ -92,6 +92,8 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
+ 		status = "okay"; /* Conflict with pwm0. */
+ 
+ 		red {
+@@ -537,6 +539,10 @@
+ 				 AT91_PIOA 19 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)	/* PA19 DAT2 periph A with pullup */
+ 				 AT91_PIOA 20 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)>;	/* PA20 DAT3 periph A with pullup */
+ 		};
++		pinctrl_sdmmc0_cd: sdmmc0_cd {
++			atmel,pins =
++				<AT91_PIOA 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++		};
+ 	};
+ 
+ 	sdmmc1 {
+@@ -569,6 +575,14 @@
+ 				      AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ 		};
+ 	};
++
++	leds {
++		pinctrl_gpio_leds: gpio_leds {
++			atmel,pins = <AT91_PIOB 11 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++				      AT91_PIOB 12 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++				      AT91_PIOB 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++		};
++	};
+ }; /* pinctrl */
+ 
+ &pwm0 {
+@@ -580,7 +594,7 @@
+ &sdmmc0 {
+ 	bus-width = <4>;
+ 	pinctrl-names = "default";
+-	pinctrl-0 = <&pinctrl_sdmmc0_default>;
++	pinctrl-0 = <&pinctrl_sdmmc0_default &pinctrl_sdmmc0_cd>;
+ 	status = "okay";
+ 	cd-gpios = <&pioA 23 GPIO_ACTIVE_LOW>;
+ 	disable-wp;
+--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+@@ -57,6 +57,8 @@
+ 			};
+ 
+ 			spi0: spi@f0004000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi0_cs>;
+ 				cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
+ 				status = "okay";
+ 			};
+@@ -169,6 +171,8 @@
+ 			};
+ 
+ 			spi1: spi@f8008000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi1_cs>;
+ 				cs-gpios = <&pioC 25 0>;
+ 				status = "okay";
+ 			};
+@@ -248,6 +252,26 @@
+ 							<AT91_PIOE 3 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
+ 							 AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ 					};
++
++					pinctrl_gpio_leds: gpio_leds_default {
++						atmel,pins =
++							<AT91_PIOE 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOE 24 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_spi0_cs: spi0_cs_default {
++						atmel,pins =
++							<AT91_PIOD 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_spi1_cs: spi1_cs_default {
++						atmel,pins = <AT91_PIOC 25 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_vcc_mmc0_reg_gpio: vcc_mmc0_reg_gpio_default {
++						atmel,pins = <AT91_PIOE 2 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
+ 				};
+ 			};
+ 		};
+@@ -339,6 +363,8 @@
+ 
+ 	vcc_mmc0_reg: fixedregulator_mmc0 {
+ 		compatible = "regulator-fixed";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_vcc_mmc0_reg_gpio>;
+ 		gpio = <&pioE 2 GPIO_ACTIVE_LOW>;
+ 		regulator-name = "mmc0-card-supply";
+ 		regulator-min-microvolt = <3300000>;
+@@ -362,6 +388,9 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
++		status = "okay";
+ 
+ 		d2 {
+ 			label = "d2";
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -90,6 +90,8 @@
+ 			};
+ 
+ 			spi1: spi@fc018000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi0_cs>;
+ 				cs-gpios = <&pioB 21 0>;
+ 				status = "okay";
+ 			};
+@@ -147,6 +149,19 @@
+ 						atmel,pins =
+ 							<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+ 					};
++					pinctrl_spi0_cs: spi0_cs_default {
++						atmel,pins =
++							<AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++					pinctrl_gpio_leds: gpio_leds_default {
++						atmel,pins =
++							<AT91_PIOD 30 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOE 15 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++					pinctrl_vcc_mmc1_reg: vcc_mmc1_reg {
++						atmel,pins =
++							<AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
+ 				};
+ 			};
+ 		};
+@@ -252,6 +267,8 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
+ 		status = "okay";
+ 
+ 		d8 {
+@@ -278,6 +295,8 @@
+ 
+ 	vcc_mmc1_reg: fixedregulator_mmc1 {
+ 		compatible = "regulator-fixed";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_vcc_mmc1_reg>;
+ 		gpio = <&pioE 4 GPIO_ACTIVE_LOW>;
+ 		regulator-name = "VDD MCI1";
+ 		regulator-min-microvolt = <3300000>;
diff --git a/queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch b/queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
new file mode 100644
index 00000000000..18cafc45e41
--- /dev/null
+++ b/queue-5.13/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
@@ -0,0 +1,33 @@
+From 847fdae1579f4ee930b01f24a7847b8043bf468c Mon Sep 17 00:00:00 2001
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+Date: Tue, 27 Jul 2021 20:13:12 +0300
+Subject: char: tpm: Kconfig: remove bad i2c cr50 select
+
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+
+commit 847fdae1579f4ee930b01f24a7847b8043bf468c upstream.
+
+This fixes a minor bug which went unnoticed during the initial
+driver upstreaming review: TCG_CR50 does not exist in mainline
+kernels, so remove it.
+
+Fixes: 3a253caaad11 ("char: tpm: add i2c driver for cr50")
+Cc: stable@vger.kernel.org
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/tpm/Kconfig
++++ b/drivers/char/tpm/Kconfig
+@@ -89,7 +89,6 @@ config TCG_TIS_SYNQUACER
+ config TCG_TIS_I2C_CR50
+ 	tristate "TPM Interface Specification 2.0 Interface (I2C - CR50)"
+ 	depends on I2C
+-	select TCG_CR50
+ 	help
+ 	  This is a driver for the Google cr50 I2C TPM interface which is a
+ 	  custom microcontroller and requires a custom i2c protocol interface
diff --git a/queue-5.13/fuse-flush-extending-writes.patch b/queue-5.13/fuse-flush-extending-writes.patch
new file mode 100644
index 00000000000..b18abd3853d
--- /dev/null
+++ b/queue-5.13/fuse-flush-extending-writes.patch
@@ -0,0 +1,49 @@
+From 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 31 Aug 2021 14:18:08 +0200
+Subject: fuse: flush extending writes
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd upstream.
+
+Callers of fuse_writeback_range() assume that the file is ready for
+modification by the server in the supplied byte range after the call
+returns.
+
+If there's a write that extends the file beyond the end of the supplied
+range, then the file needs to be extended to at least the end of the range,
+but currently that's not done.
+
+There are at least two cases where this can cause problems:
+
+ - copy_file_range() will return short count if the file is not extended
+   up to end of the source range.
+
+ - FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE will not extend the file,
+   hence the region may not be fully allocated.
+
+Fix by flushing writes from the start of the range up to the end of the
+file.  This could be optimized if the writes are non-extending, etc, but
+it's probably not worth the trouble.
+
+Fixes: a2bc92362941 ("fuse: fix copy_file_range() in the writeback case")
+Fixes: 6b1bdb56b17c ("fuse: allow fallocate(FALLOC_FL_ZERO_RANGE)")
+Cc: <stable@vger.kernel.org>  # v5.2
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -2886,7 +2886,7 @@ fuse_direct_IO(struct kiocb *iocb, struc
+ 
+ static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
+ {
+-	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
++	int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
+ 
+ 	if (!err)
+ 		fuse_sync_writes(inode);
diff --git a/queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch b/queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch
new file mode 100644
index 00000000000..04fd413702f
--- /dev/null
+++ b/queue-5.13/fuse-truncate-pagecache-on-atomic_o_trunc.patch
@@ -0,0 +1,58 @@
+From 76224355db7570cbe6b6f75c8929a1558828dd55 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 17 Aug 2021 21:05:16 +0200
+Subject: fuse: truncate pagecache on atomic_o_trunc
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 76224355db7570cbe6b6f75c8929a1558828dd55 upstream.
+
+fuse_finish_open() will be called with FUSE_NOWRITE in case of atomic
+O_TRUNC.  This can deadlock with fuse_wait_on_page_writeback() in
+fuse_launder_page() triggered by invalidate_inode_pages2().
+
+Fix by replacing invalidate_inode_pages2() in fuse_finish_open() with a
+truncate_pagecache() call.  This makes sense regardless of FOPEN_KEEP_CACHE
+or fc->writeback cache, so do it unconditionally.
+
+Reported-by: Xie Yongji <xieyongji@bytedance.com>
+Reported-and-tested-by: syzbot+bea44a5189836d956894@syzkaller.appspotmail.com
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inod
+ 	struct fuse_file *ff = file->private_data;
+ 	struct fuse_conn *fc = get_fuse_conn(inode);
+ 
+-	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+-		invalidate_inode_pages2(inode->i_mapping);
+ 	if (ff->open_flags & FOPEN_STREAM)
+ 		stream_open(inode, file);
+ 	else if (ff->open_flags & FOPEN_NONSEEKABLE)
+ 		nonseekable_open(inode, file);
++
+ 	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ 		struct fuse_inode *fi = get_fuse_inode(inode);
+ 
+@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inod
+ 		fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ 		i_size_write(inode, 0);
+ 		spin_unlock(&fi->lock);
++		truncate_pagecache(inode, 0);
+ 		fuse_invalidate_attr(inode);
+ 		if (fc->writeback_cache)
+ 			file_update_time(file);
++	} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
++		invalidate_inode_pages2(inode->i_mapping);
+ 	}
++
+ 	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+ 		fuse_link_write_file(file);
+ }
diff --git a/queue-5.13/fuse-wait-for-writepages-in-syncfs.patch b/queue-5.13/fuse-wait-for-writepages-in-syncfs.patch
new file mode 100644
index 00000000000..15f09799c2d
--- /dev/null
+++ b/queue-5.13/fuse-wait-for-writepages-in-syncfs.patch
@@ -0,0 +1,242 @@
+From 660585b56e63ca034ad506ea53c807c5cdca3196 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 1 Sep 2021 12:39:02 +0200
+Subject: fuse: wait for writepages in syncfs
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 660585b56e63ca034ad506ea53c807c5cdca3196 upstream.
+
+In case of fuse the MM subsystem doesn't guarantee that page writeback
+completes by the time ->sync_fs() is called.  This is because fuse
+completes page writeback immediately to prevent DoS of memory reclaim by
+the userspace file server.
+
+This means that fuse itself must ensure that writes are synced before
+sending the SYNCFS request to the server.
+
+Introduce sync buckets, that hold a counter for the number of outstanding
+write requests.  On syncfs replace the current bucket with a new one and
+wait until the old bucket's counter goes down to zero.
+
+It is possible to have multiple syncfs calls in parallel, in which case
+there could be more than one waited-on buckets.  Descendant buckets must
+not complete until the parent completes.  Add a count to the child (new)
+bucket until the (parent) old bucket completes.
+
+Use RCU protection to dereference the current bucket and to wake up an
+emptied bucket.  Use fc->lock to protect against parallel assignments to
+the current bucket.
+
+This leaves just the counter to be a possible scalability issue.  The
+fc->num_waiting counter has a similar issue, so both should be addressed at
+the same time.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Fixes: 2d82ab251ef0 ("virtiofs: propagate sync() to file server")
+Cc: <stable@vger.kernel.org> # v5.14
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c   |   21 +++++++++++++++++++
+ fs/fuse/fuse_i.h |   19 +++++++++++++++++
+ fs/fuse/inode.c  |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 100 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -392,6 +392,7 @@ struct fuse_writepage_args {
+ 	struct list_head queue_entry;
+ 	struct fuse_writepage_args *next;
+ 	struct inode *inode;
++	struct fuse_sync_bucket *bucket;
+ };
+ 
+ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
+@@ -1613,6 +1614,9 @@ static void fuse_writepage_free(struct f
+ 	struct fuse_args_pages *ap = &wpa->ia.ap;
+ 	int i;
+ 
++	if (wpa->bucket)
++		fuse_sync_bucket_dec(wpa->bucket);
++
+ 	for (i = 0; i < ap->num_pages; i++)
+ 		__free_page(ap->pages[i]);
+ 
+@@ -1876,6 +1880,20 @@ static struct fuse_writepage_args *fuse_
+ 
+ }
+ 
++static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
++					 struct fuse_writepage_args *wpa)
++{
++	if (!fc->sync_fs)
++		return;
++
++	rcu_read_lock();
++	/* Prevent resurrection of dead bucket in unlikely race with syncfs */
++	do {
++		wpa->bucket = rcu_dereference(fc->curr_bucket);
++	} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
++	rcu_read_unlock();
++}
++
+ static int fuse_writepage_locked(struct page *page)
+ {
+ 	struct address_space *mapping = page->mapping;
+@@ -1903,6 +1921,7 @@ static int fuse_writepage_locked(struct
+ 	if (!wpa->ia.ff)
+ 		goto err_nofile;
+ 
++	fuse_writepage_add_to_bucket(fc, wpa);
+ 	fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+ 
+ 	copy_highpage(tmp_page, page);
+@@ -2153,6 +2172,8 @@ static int fuse_writepages_fill(struct p
+ 			__free_page(tmp_page);
+ 			goto out_unlock;
+ 		}
++		fuse_writepage_add_to_bucket(fc, wpa);
++
+ 		data->max_pages = 1;
+ 
+ 		ap = &wpa->ia.ap;
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -515,6 +515,13 @@ struct fuse_fs_context {
+ 	void **fudptr;
+ };
+ 
++struct fuse_sync_bucket {
++	/* count is a possible scalability bottleneck */
++	atomic_t count;
++	wait_queue_head_t waitq;
++	struct rcu_head rcu;
++};
++
+ /**
+  * A Fuse connection.
+  *
+@@ -807,6 +814,9 @@ struct fuse_conn {
+ 
+ 	/** List of filesystems using this connection */
+ 	struct list_head mounts;
++
++	/* New writepages go into this bucket */
++	struct fuse_sync_bucket __rcu *curr_bucket;
+ };
+ 
+ /*
+@@ -910,6 +920,15 @@ static inline void fuse_page_descs_lengt
+ 		descs[i].length = PAGE_SIZE - descs[i].offset;
+ }
+ 
++static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
++{
++	/* Need RCU protection to prevent use after free after the decrement */
++	rcu_read_lock();
++	if (atomic_dec_and_test(&bucket->count))
++		wake_up(&bucket->waitq);
++	rcu_read_unlock();
++}
++
+ /** Device operations */
+ extern const struct file_operations fuse_dev_operations;
+ 
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -506,6 +506,57 @@ static int fuse_statfs(struct dentry *de
+ 	return err;
+ }
+ 
++static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
++{
++	struct fuse_sync_bucket *bucket;
++
++	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
++	if (bucket) {
++		init_waitqueue_head(&bucket->waitq);
++		/* Initial active count */
++		atomic_set(&bucket->count, 1);
++	}
++	return bucket;
++}
++
++static void fuse_sync_fs_writes(struct fuse_conn *fc)
++{
++	struct fuse_sync_bucket *bucket, *new_bucket;
++	int count;
++
++	new_bucket = fuse_sync_bucket_alloc();
++	spin_lock(&fc->lock);
++	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++	count = atomic_read(&bucket->count);
++	WARN_ON(count < 1);
++	/* No outstanding writes? */
++	if (count == 1) {
++		spin_unlock(&fc->lock);
++		kfree(new_bucket);
++		return;
++	}
++
++	/*
++	 * Completion of new bucket depends on completion of this bucket, so add
++	 * one more count.
++	 */
++	atomic_inc(&new_bucket->count);
++	rcu_assign_pointer(fc->curr_bucket, new_bucket);
++	spin_unlock(&fc->lock);
++	/*
++	 * Drop initial active count.  At this point if all writes in this and
++	 * ancestor buckets complete, the count will go to zero and this task
++	 * will be woken up.
++	 */
++	atomic_dec(&bucket->count);
++
++	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
++
++	/* Drop temp count on descendant bucket */
++	fuse_sync_bucket_dec(new_bucket);
++	kfree_rcu(bucket, rcu);
++}
++
+ static int fuse_sync_fs(struct super_block *sb, int wait)
+ {
+ 	struct fuse_mount *fm = get_fuse_mount_super(sb);
+@@ -528,6 +579,8 @@ static int fuse_sync_fs(struct super_blo
+ 	if (!fc->sync_fs)
+ 		return 0;
+ 
++	fuse_sync_fs_writes(fc);
++
+ 	memset(&inarg, 0, sizeof(inarg));
+ 	args.in_numargs = 1;
+ 	args.in_args[0].size = sizeof(inarg);
+@@ -763,6 +816,7 @@ void fuse_conn_put(struct fuse_conn *fc)
+ {
+ 	if (refcount_dec_and_test(&fc->count)) {
+ 		struct fuse_iqueue *fiq = &fc->iq;
++		struct fuse_sync_bucket *bucket;
+ 
+ 		if (IS_ENABLED(CONFIG_FUSE_DAX))
+ 			fuse_dax_conn_free(fc);
+@@ -770,6 +824,11 @@ void fuse_conn_put(struct fuse_conn *fc)
+ 			fiq->ops->release(fiq);
+ 		put_pid_ns(fc->pid_ns);
+ 		put_user_ns(fc->user_ns);
++		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++		if (bucket) {
++			WARN_ON(atomic_read(&bucket->count) != 1);
++			kfree(bucket);
++		}
+ 		fc->release(fc);
+ 	}
+ }
+@@ -1366,6 +1425,7 @@ int fuse_fill_super_common(struct super_
+ 	if (sb->s_flags & SB_MANDLOCK)
+ 		goto err;
+ 
++	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
+ 	fuse_sb_defaults(sb);
+ 
+ 	if (ctx->is_bdev) {
diff --git a/queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch b/queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch
new file mode 100644
index 00000000000..d8acf2e5ca7
--- /dev/null
+++ b/queue-5.13/ima-remove-the-dependency-on-crypto_md5.patch
@@ -0,0 +1,45 @@
+From 8510505d55e194d3f6c9644c9f9d12c4f6b0395a Mon Sep 17 00:00:00 2001
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+Date: Mon, 16 Aug 2021 08:10:59 +0000
+Subject: IMA: remove the dependency on CRYPTO_MD5
+
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+
+commit 8510505d55e194d3f6c9644c9f9d12c4f6b0395a upstream.
+
+MD5 is a weak digest algorithm that shouldn't be used for cryptographic
+operation. It hinders the efficiency of a patch set that aims to limit
+the digests allowed for the extended file attribute namely security.ima.
+MD5 is no longer a requirement for IMA, nor should it be used there.
+
+The sole place where we still use the MD5 algorithm inside IMA is setting
+the ima_hash algorithm to MD5, if the user supplies 'ima_hash=md5'
+parameter on the command line.  With commit ab60368ab6a4 ("ima: Fallback
+to the builtin hash algorithm"), setting "ima_hash=md5" fails gracefully
+when CRYPTO_MD5 is not set:
+	ima: Can not allocate md5 (reason: -2)
+	ima: Allocating md5 failed, going to use default hash algorithm sha256
+
+Remove the CRYPTO_MD5 dependency for IMA.
+
+Signed-off-by: THOBY Simon <Simon.THOBY@viveris.fr>
+Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
+[zohar@linux.ibm.com: include commit number in patch description for
+stable.]
+Cc: stable@vger.kernel.org # 4.17
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/security/integrity/ima/Kconfig
++++ b/security/integrity/ima/Kconfig
+@@ -6,7 +6,6 @@ config IMA
+ 	select SECURITYFS
+ 	select CRYPTO
+ 	select CRYPTO_HMAC
+-	select CRYPTO_MD5
+ 	select CRYPTO_SHA1
+ 	select CRYPTO_HASH_INFO
+ 	select TCG_TPM if HAS_IOMEM && !UML
diff --git a/queue-5.13/ima-remove-wmissing-prototypes-warning.patch b/queue-5.13/ima-remove-wmissing-prototypes-warning.patch
new file mode 100644
index 00000000000..4464b563fdb
--- /dev/null
+++ b/queue-5.13/ima-remove-wmissing-prototypes-warning.patch
@@ -0,0 +1,40 @@
+From a32ad90426a9c8eb3915eed26e08ce133bd9e0da Mon Sep 17 00:00:00 2001
+From: Austin Kim <austin.kim@lge.com>
+Date: Tue, 29 Jun 2021 14:50:50 +0100
+Subject: IMA: remove -Wmissing-prototypes warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Austin Kim <austin.kim@lge.com>
+
+commit a32ad90426a9c8eb3915eed26e08ce133bd9e0da upstream.
+
+With W=1 build, the compiler throws warning message as below:
+
+   security/integrity/ima/ima_mok.c:24:12: warning:
+   no previous prototype for âima_mok_initâ [-Wmissing-prototypes]
+       __init int ima_mok_init(void)
+
+Silence the warning by adding static keyword to ima_mok_init().
+
+Signed-off-by: Austin Kim <austin.kim@lge.com>
+Fixes: 41c89b64d718 ("IMA: create machine owner and blacklist keyrings")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/ima_mok.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/integrity/ima/ima_mok.c
++++ b/security/integrity/ima/ima_mok.c
+@@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring;
+ /*
+  * Allocate the IMA blacklist keyring
+  */
+-__init int ima_mok_init(void)
++static __init int ima_mok_init(void)
+ {
+ 	struct key_restriction *restriction;
+ 
diff --git a/queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch b/queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
new file mode 100644
index 00000000000..feb447f1a85
--- /dev/null
+++ b/queue-5.13/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
@@ -0,0 +1,104 @@
+From ecc53c48c13d995e6fe5559e30ffee48d92784fd Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 29 Aug 2021 16:13:03 -0600
+Subject: io-wq: check max_worker limits if a worker transitions bound state
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit ecc53c48c13d995e6fe5559e30ffee48d92784fd upstream.
+
+For the two places where new workers are created, we diligently check if
+we are allowed to create a new worker. If we're currently at the limit
+of how many workers of a given type we can have, then we don't create
+any new ones.
+
+If you have a mixed workload with various types of bound and unbounded
+work, then it can happen that a worker finishes one type of work and
+is then transitioned to the other type. For this case, we don't check
+if we are actually allowed to do so. This can cause io-wq to temporarily
+exceed the allowed number of workers for a given type.
+
+When retrieving work, check that the types match. If they don't, check
+if we are allowed to transition to the other type. If not, then don't
+handle the new work.
+
+Cc: stable@vger.kernel.org
+Reported-by: Johannes Lundberg <johalun0@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   33 ++++++++++++++++++++++++++++++---
+ 1 file changed, 30 insertions(+), 3 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -424,7 +424,28 @@ static void io_wait_on_hash(struct io_wq
+ 	spin_unlock(&wq->hash->wait.lock);
+ }
+ 
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
++/*
++ * We can always run the work if the worker is currently the same type as
++ * the work (eg both are bound, or both are unbound). If they are not the
++ * same, only allow it if incrementing the worker count would be allowed.
++ */
++static bool io_worker_can_run_work(struct io_worker *worker,
++				   struct io_wq_work *work)
++{
++	struct io_wqe_acct *acct;
++
++	if (!(worker->flags & IO_WORKER_F_BOUND) !=
++	    !(work->flags & IO_WQ_WORK_UNBOUND))
++		return true;
++
++	/* not the same type, check if we'd go over the limit */
++	acct = io_work_get_acct(worker->wqe, work);
++	return acct->nr_workers < acct->max_workers;
++}
++
++static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++					   struct io_worker *worker,
++					   bool *stalled)
+ 	__must_hold(wqe->lock)
+ {
+ 	struct io_wq_work_node *node, *prev;
+@@ -436,6 +457,9 @@ static struct io_wq_work *io_get_next_wo
+ 
+ 		work = container_of(node, struct io_wq_work, list);
+ 
++		if (!io_worker_can_run_work(worker, work))
++			break;
++
+ 		/* not hashed, can run anytime */
+ 		if (!io_wq_is_hashed(work)) {
+ 			wq_list_del(&wqe->work_list, node, prev);
+@@ -462,6 +486,7 @@ static struct io_wq_work *io_get_next_wo
+ 		raw_spin_unlock(&wqe->lock);
+ 		io_wait_on_hash(wqe, stall_hash);
+ 		raw_spin_lock(&wqe->lock);
++		*stalled = true;
+ 	}
+ 
+ 	return NULL;
+@@ -501,6 +526,7 @@ static void io_worker_handle_work(struct
+ 
+ 	do {
+ 		struct io_wq_work *work;
++		bool stalled;
+ get_next:
+ 		/*
+ 		 * If we got some work, mark us as busy. If we didn't, but
+@@ -509,10 +535,11 @@ get_next:
+ 		 * can't make progress, any work completion or insertion will
+ 		 * clear the stalled flag.
+ 		 */
+-		work = io_get_next_work(wqe);
++		stalled = false;
++		work = io_get_next_work(wqe, worker, &stalled);
+ 		if (work)
+ 			__io_worker_busy(wqe, worker, work);
+-		else if (!wq_list_empty(&wqe->work_list))
++		else if (stalled)
+ 			wqe->flags |= IO_WQE_FLAG_STALLED;
+ 
+ 		raw_spin_unlock_irq(&wqe->lock);
diff --git a/queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch b/queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
new file mode 100644
index 00000000000..2324c2f37e0
--- /dev/null
+++ b/queue-5.13/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
@@ -0,0 +1,54 @@
+From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Mon, 2 Aug 2021 13:38:30 +0100
+Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 47e6223c841e029bfc23c3ce594dac5525cebaf8 upstream.
+
+Booting a KVM host in protected mode with kmemleak quickly results
+in a pretty bad crash, as kmemleak doesn't know that the HYP sections
+have been taken away. This is specially true for the BSS section,
+which is part of the kernel BSS section and registered at boot time
+by kmemleak itself.
+
+Unregister the HYP part of the BSS before making that section
+HYP-private. The rest of the HYP-specific data is obtained via
+the page allocator or lives in other sections, none of which is
+subjected to kmemleak.
+
+Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 5.13
+Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/arm.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -15,6 +15,7 @@
+ #include <linux/fs.h>
+ #include <linux/mman.h>
+ #include <linux/sched.h>
++#include <linux/kmemleak.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
+@@ -1957,6 +1958,12 @@ static int finalize_hyp_mode(void)
+ 	if (ret)
+ 		return ret;
+ 
++	/*
++	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
++	 * at, which would end badly once the section is inaccessible.
++	 * None of other sections should ever be introspected.
++	 */
++	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ 	ret = pkvm_mark_hyp_section(__hyp_bss);
+ 	if (ret)
+ 		return ret;
diff --git a/queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch b/queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
new file mode 100644
index 00000000000..0855571e720
--- /dev/null
+++ b/queue-5.13/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
@@ -0,0 +1,60 @@
+From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 10 Aug 2021 07:45:26 -0700
+Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7782bb8d818d8f47c26b22079db10599922787a upstream.
+
+Clear nested.pi_pending on nested VM-Enter even if L2 will run without
+posted interrupts enabled.  If nested.pi_pending is left set from a
+previous L2, vmx_complete_nested_posted_interrupt() will pick up the
+stale flag and exit to userspace with an "internal emulation error" due
+the new L2 not having a valid nested.pi_desc.
+
+Arguably, vmx_complete_nested_posted_interrupt() should first check for
+posted interrupts being enabled, but it's also completely reasonable that
+KVM wouldn't screw up a fundamental flag.  Not to mention that the mere
+existence of nested.pi_pending is a long-standing bug as KVM shouldn't
+move the posted interrupt out of the IRR until it's actually processed,
+e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit
+with a "pending" posted interrupt.  Fixing the mess is a future problem.
+
+Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI
+descriptor as an error, this was a benign bug as the null PI descriptor
+effectively served as a check on PI not being enabled.  Even then, the
+new flow did not become problematic until KVM started checking the result
+of kvm_check_nested_events().
+
+Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
+Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable")
+Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails")
+Cc: stable@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210810144526.2662272-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2226,12 +2226,11 @@ static void prepare_vmcs02_early(struct
+ 			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
+ 
+ 	/* Posted interrupts setting is only taken from vmcs12.  */
+-	if (nested_cpu_has_posted_intr(vmcs12)) {
++	vmx->nested.pi_pending = false;
++	if (nested_cpu_has_posted_intr(vmcs12))
+ 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
+-		vmx->nested.pi_pending = false;
+-	} else {
++	else
+ 		exec_control &= ~PIN_BASED_POSTED_INTR;
+-	}
+ 	pin_controls_set(vmx, exec_control);
+ 
+ 	/*
diff --git a/queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch b/queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
new file mode 100644
index 00000000000..ae66001ce58
--- /dev/null
+++ b/queue-5.13/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
@@ -0,0 +1,122 @@
+From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001
+From: Halil Pasic <pasic@linux.ibm.com>
+Date: Fri, 27 Aug 2021 14:54:29 +0200
+Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Halil Pasic <pasic@linux.ibm.com>
+
+commit a3e03bc1368c1bc16e19b001fc96dc7430573cc8 upstream.
+
+While in practice vcpu->vcpu_idx ==  vcpu->vcp_id is often true, it may
+not always be, and we must not rely on this. Reason is that KVM decides
+the vcpu_idx, userspace decides the vcpu_id, thus the two might not
+match.
+
+Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies
+that code like
+for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+                vcpu = kvm_get_vcpu(kvm, vcpu_id);
+		do_stuff(vcpu);
+}
+is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an
+vcpu_id.  The trouble is, we do actually use kvm->arch.idle_mask like
+this. To fix this problem we have two options. Either use
+kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id,
+or switch to indexing via vcpu_idx. The latter is preferable for obvious
+reasons.
+
+Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to
+indexing it by vcpu_idx.  To keep gisa_int.kicked_mask indexed by the
+same index as idle_mask lets make the same change for it as well.
+
+Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array")
+Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
+Reviewed-by: Christian BorntrÃ¤ger <borntraeger@de.ibm.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 3.15+
+Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kvm_host.h |    1 +
+ arch/s390/kvm/interrupt.c        |   12 ++++++------
+ arch/s390/kvm/kvm-s390.c         |    2 +-
+ arch/s390/kvm/kvm-s390.h         |    2 +-
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -962,6 +962,7 @@ struct kvm_arch{
+ 	atomic64_t cmma_dirty_pages;
+ 	/* subset of available cpu features enabled by user space */
+ 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
++	/* indexed by vcpu_idx */
+ 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+ 	struct kvm_s390_gisa_interrupt gisa_int;
+ 	struct kvm_s390_pv pv;
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(st
+ static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+-	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vc
+ 
+ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
+ {
+-	int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
++	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
+ 	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ 	struct kvm_vcpu *vcpu;
+ 
+-	for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+-		vcpu = kvm_get_vcpu(kvm, vcpu_id);
++	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
++		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ 		if (psw_ioint_disabled(vcpu))
+ 			continue;
+ 		deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+ 		if (deliverable_mask) {
+ 			/* lately kicked but not yet running */
+-			if (test_and_set_bit(vcpu_id, gi->kicked_mask))
++			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
+ 				return;
+ 			kvm_s390_vcpu_wakeup(vcpu);
+ 			return;
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -4020,7 +4020,7 @@ static int vcpu_pre_run(struct kvm_vcpu
+ 		kvm_s390_patch_guest_per_regs(vcpu);
+ 	}
+ 
+-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
++	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+ 
+ 	vcpu->arch.sie_block->icptcode = 0;
+ 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct
+ 
+ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+ {
+-	return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch b/queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
new file mode 100644
index 00000000000..9ccd5ebd2b9
--- /dev/null
+++ b/queue-5.13/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
@@ -0,0 +1,34 @@
+From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 26 Aug 2021 12:57:49 +0300
+Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 upstream.
+
+If we are emulating an invalid guest state, we don't have a correct
+exit reason, and thus we shouldn't do anything in this function.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6452,6 +6452,9 @@ static void vmx_handle_exit_irqoff(struc
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 
++	if (vmx->emulation_required)
++		return;
++
+ 	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+ 		handle_external_interrupt_irqoff(vcpu);
+ 	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
diff --git a/queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch b/queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
new file mode 100644
index 00000000000..1e7b967c816
--- /dev/null
+++ b/queue-5.13/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
@@ -0,0 +1,74 @@
+From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 6 Aug 2021 07:05:58 -0400
+Subject: KVM: x86: clamp host mapping level to max_level in kvm_mmu_max_mapping_level
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit ec607a564f70519b340f7eb4cfc0f4a6b55285ac upstream.
+
+This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler,
+but it does fix two bugs as well.
+
+One bug is in zapping collapsible PTEs.  If a large page size is
+disallowed but not all of them, kvm_mmu_max_mapping_level will return the
+host mapping level and the small PTEs will be zapped up to that level.
+However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and
+preserve the 2MB ones. This can happen for example when NX huge pages
+are in use.
+
+The second would happen when userspace backs guest memory
+with a 1gb hugepage but only assign a subset of the page to
+the guest.  1gb pages would be disallowed by the memslot, but
+not 2mb.  kvm_mmu_max_mapping_level() would fall through to the
+host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole
+thing into the guest.
+
+Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2754,6 +2754,7 @@ int kvm_mmu_max_mapping_level(struct kvm
+ 			      kvm_pfn_t pfn, int max_level)
+ {
+ 	struct kvm_lpage_info *linfo;
++	int host_level;
+ 
+ 	max_level = min(max_level, max_huge_page_level);
+ 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
+@@ -2765,7 +2766,8 @@ int kvm_mmu_max_mapping_level(struct kvm
+ 	if (max_level == PG_LEVEL_4K)
+ 		return PG_LEVEL_4K;
+ 
+-	return host_pfn_mapping_level(kvm, gfn, pfn, slot);
++	host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
++	return min(host_level, max_level);
+ }
+ 
+ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+@@ -2789,17 +2791,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_v
+ 	if (!slot)
+ 		return PG_LEVEL_4K;
+ 
+-	level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+-	if (level == PG_LEVEL_4K)
+-		return level;
+-
+-	*req_level = level = min(level, max_level);
+-
+ 	/*
+ 	 * Enforce the iTLB multihit workaround after capturing the requested
+ 	 * level, which will be used to do precise, accurate accounting.
+ 	 */
+-	if (huge_page_disallowed)
++	*req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
++	if (level == PG_LEVEL_4K || huge_page_disallowed)
+ 		return PG_LEVEL_4K;
+ 
+ 	/*
diff --git a/queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch b/queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
new file mode 100644
index 00000000000..9bea6ae9a5a
--- /dev/null
+++ b/queue-5.13/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
@@ -0,0 +1,82 @@
+From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 2 Aug 2021 21:46:06 -0700
+Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage stats
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 088acd23526647844aec1c39db4ad02552c86c7b upstream.
+
+Factor in whether or not the old/new SPTEs are shadow-present when
+adjusting the large page stats in the TDP MMU.  A modified MMIO SPTE can
+toggle the page size bit, as bit 7 is used to store the MMIO generation,
+i.e. is_large_pte() can get a false positive when called on a MMIO SPTE.
+Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic
+value.
+
+Opportunistically move the logic below the check to verify at least one
+of the old/new SPTEs is shadow present.
+
+Use is/was_leaf even though is/was_present would suffice.  The code
+generation is roughly equivalent since all flags need to be computed
+prior to the code in question, and using the *_leaf flags will minimize
+the diff in a future enhancement to account all pages, i.e. will change
+the check to "is_leaf != was_leaf".
+
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+
+Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20210803044607.599629-3-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -410,6 +410,7 @@ static void __handle_changed_spte(struct
+ 	bool was_leaf = was_present && is_last_spte(old_spte, level);
+ 	bool is_leaf = is_present && is_last_spte(new_spte, level);
+ 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
++	bool was_large, is_large;
+ 
+ 	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ 	WARN_ON(level < PG_LEVEL_4K);
+@@ -443,13 +444,6 @@ static void __handle_changed_spte(struct
+ 
+ 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ 
+-	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+-		if (is_large_pte(old_spte))
+-			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+-		else
+-			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+-	}
+-
+ 	/*
+ 	 * The only times a SPTE should be changed from a non-present to
+ 	 * non-present state is when an MMIO entry is installed/modified/
+@@ -475,6 +469,18 @@ static void __handle_changed_spte(struct
+ 		return;
+ 	}
+ 
++	/*
++	 * Update large page stats if a large page is being zapped, created, or
++	 * is replacing an existing shadow page.
++	 */
++	was_large = was_leaf && is_large_pte(old_spte);
++	is_large = is_leaf && is_large_pte(new_spte);
++	if (was_large != is_large) {
++		if (was_large)
++			atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
++		else
++			atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
++	}
+ 
+ 	if (was_leaf && is_dirty_spte(old_spte) &&
+ 	    (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
diff --git a/queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch b/queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
new file mode 100644
index 00000000000..a69dd791e61
--- /dev/null
+++ b/queue-5.13/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
@@ -0,0 +1,40 @@
+From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+Date: Wed, 28 Apr 2021 10:22:01 +0800
+Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted
+
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+
+commit d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 upstream.
+
+When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature
+especially there's a big tsc warp (like a new vCPU is hot-added into VM
+which has been up for a long time), tsc_offset is added by a large value
+then go back to guest. This causes system time jump as tsc_timestamp is
+not adjusted in the meantime and pvclock monotonic character.
+To fix this, just notify kvm to update vCPU's guest time before back to
+guest.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3223,6 +3223,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
+ 			if (!msr_info->host_initiated) {
+ 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+ 				adjust_tsc_offset_guest(vcpu, adj);
++				/* Before back to guest, tsc_timestamp must be adjusted
++				 * as well, otherwise guest's percpu pvclock time could jump.
++				 */
++				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ 			}
+ 			vcpu->arch.ia32_tsc_adjust_msr = data;
+ 		}
diff --git a/queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch b/queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
new file mode 100644
index 00000000000..d60b8e8eeaf
--- /dev/null
+++ b/queue-5.13/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
@@ -0,0 +1,82 @@
+From 46d4703b1db4c86ab5acb2331b10df999f005e8e Mon Sep 17 00:00:00 2001
+From: Xiao Ni <xni@redhat.com>
+Date: Wed, 18 Aug 2021 13:57:48 +0800
+Subject: md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
+
+From: Xiao Ni <xni@redhat.com>
+
+commit 46d4703b1db4c86ab5acb2331b10df999f005e8e upstream.
+
+We are seeing the following warning in raid10_handle_discard.
+[  695.110751] =============================
+[  695.131439] WARNING: suspicious RCU usage
+[  695.151389] 4.18.0-319.el8.x86_64+debug #1 Not tainted
+[  695.174413] -----------------------------
+[  695.192603] drivers/md/raid10.c:1776 suspicious
+rcu_dereference_check() usage!
+[  695.225107] other info that might help us debug this:
+[  695.260940] rcu_scheduler_active = 2, debug_locks = 1
+[  695.290157] no locks held by mkfs.xfs/10186.
+
+In the first loop of function raid10_handle_discard. It already
+determines which disk need to handle discard request and add the
+rdev reference count rdev->nr_pending. So the conf->mirrors will
+not change until all bios come back from underlayer disks. It
+doesn't need to use rcu_dereference to get rdev.
+
+Cc: stable@vger.kernel.org
+Fixes: d30588b2731f ('md/raid10: improve raid10 discard request')
+Signed-off-by: Xiao Ni <xni@redhat.com>
+Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid10.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1706,6 +1706,11 @@ retry_discard:
+ 	} else
+ 		r10_bio->master_bio = (struct bio *)first_r10bio;
+ 
++	/*
++	 * first select target devices under rcu_lock and
++	 * inc refcount on their rdev.  Record them by setting
++	 * bios[x] to bio
++	 */
+ 	rcu_read_lock();
+ 	for (disk = 0; disk < geo->raid_disks; disk++) {
+ 		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+@@ -1737,9 +1742,6 @@ retry_discard:
+ 	for (disk = 0; disk < geo->raid_disks; disk++) {
+ 		sector_t dev_start, dev_end;
+ 		struct bio *mbio, *rbio = NULL;
+-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+-		struct md_rdev *rrdev = rcu_dereference(
+-			conf->mirrors[disk].replacement);
+ 
+ 		/*
+ 		 * Now start to calculate the start and end address for each disk.
+@@ -1769,9 +1771,12 @@ retry_discard:
+ 
+ 		/*
+ 		 * It only handles discard bio which size is >= stripe size, so
+-		 * dev_end > dev_start all the time
++		 * dev_end > dev_start all the time.
++		 * It doesn't need to use rcu lock to get rdev here. We already
++		 * add rdev->nr_pending in the first loop.
+ 		 */
+ 		if (r10_bio->devs[disk].bio) {
++			struct md_rdev *rdev = conf->mirrors[disk].rdev;
+ 			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ 			mbio->bi_end_io = raid10_end_discard_request;
+ 			mbio->bi_private = r10_bio;
+@@ -1784,6 +1789,7 @@ retry_discard:
+ 			bio_endio(mbio);
+ 		}
+ 		if (r10_bio->devs[disk].repl_bio) {
++			struct md_rdev *rrdev = conf->mirrors[disk].replacement;
+ 			rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ 			rbio->bi_end_io = raid10_end_discard_request;
+ 			rbio->bi_private = r10_bio;
diff --git a/queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch b/queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
new file mode 100644
index 00000000000..8140948df3a
--- /dev/null
+++ b/queue-5.13/perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
@@ -0,0 +1,36 @@
+From f11dd0d80555cdc8eaf5cfc9e19c9e198217f9f1 Mon Sep 17 00:00:00 2001
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Tue, 17 Aug 2021 17:10:41 -0500
+Subject: perf/x86/amd/ibs: Extend PERF_PMU_CAP_NO_EXCLUDE to IBS Op
+
+From: Kim Phillips <kim.phillips@amd.com>
+
+commit f11dd0d80555cdc8eaf5cfc9e19c9e198217f9f1 upstream.
+
+Commit:
+
+   2ff40250691e ("perf/core, arch/x86: Use PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs")
+
+neglected to do so.
+
+Fixes: 2ff40250691e ("perf/core, arch/x86: Use PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs")
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210817221048.88063-2-kim.phillips@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/amd/ibs.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/events/amd/ibs.c
++++ b/arch/x86/events/amd/ibs.c
+@@ -571,6 +571,7 @@ static struct perf_ibs perf_ibs_op = {
+ 		.start		= perf_ibs_start,
+ 		.stop		= perf_ibs_stop,
+ 		.read		= perf_ibs_read,
++		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+ 	},
+ 	.msr			= MSR_AMD64_IBSOPCTL,
+ 	.config_mask		= IBS_OP_CONFIG_MASK,
diff --git a/queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch b/queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
new file mode 100644
index 00000000000..86e52692400
--- /dev/null
+++ b/queue-5.13/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
@@ -0,0 +1,72 @@
+From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 31 Aug 2021 09:42:22 -0700
+Subject: Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()"
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e7177339d7b5f9594b316842122b5fda9513d5e2 upstream.
+
+Revert a misguided illegal GPA check when "translating" a non-nested GPA.
+The check is woefully incomplete as it does not fill in @exception as
+expected by all callers, which leads to KVM attempting to inject a bogus
+exception, potentially exposing kernel stack information in the process.
+
+ WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0
+ RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ Call Trace:
+  x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853
+  kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199
+  handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336
+  __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline]
+  vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038
+  vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712
+  vcpu_run arch/x86/kvm/x86.c:9779 [inline]
+  kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010
+  kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652
+
+The bug has escaped notice because practically speaking the GPA check is
+useless.  The GPA check in question only comes into play when KVM is
+walking guest page tables (or "translating" CR3), and KVM already handles
+illegal GPA checks by setting reserved bits in rsvd_bits_mask for each
+PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an
+illegal CR3.  This particular failure doesn't hit the existing reserved
+bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture
+simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging
+doesn't define any reserved PA bits checks, which KVM emulates by only
+incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32.
+
+Simply remove the bogus check.  There is zero meaningful value and no
+architectural justification for supporting guest.MAXPHYADDR < 32, and
+properly filling the exception would introduce non-trivial complexity.
+
+This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013.
+
+Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210831164224.1119728-2-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -257,12 +257,6 @@ static bool check_mmio_spte(struct kvm_v
+ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                                   struct x86_exception *exception)
+ {
+-	/* Check if guest physical address doesn't exceed guest maximum */
+-	if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
+-		exception->error_code |= PFERR_RSVD_MASK;
+-		return UNMAPPED_GVA;
+-	}
+-
+         return gpa;
+ }
+ 
diff --git a/queue-5.13/series b/queue-5.13/series
index bbcf2dacf03..69863b22cf0 100644
--- a/queue-5.13/series
+++ b/queue-5.13/series
@@ -274,3 +274,23 @@ raid1-ensure-write-behind-bio-has-less-than-bio_max_vecs-sectors.patch
 cifs-do-not-leak-edeadlk-to-dgetents64-for-status_user_session_deleted.patch
 smb3-fix-posix-extensions-mount-option.patch
 tty-fix-data-race-between-tiocsti-and-flush_to_ldisc.patch
+x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
+perf-x86-amd-ibs-extend-perf_pmu_cap_no_exclude-to-ibs-op.patch
+x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
+revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
+kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
+kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
+kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
+kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
+kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
+kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
+kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
+arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
+io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
+md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
+char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
+fuse-truncate-pagecache-on-atomic_o_trunc.patch
+fuse-flush-extending-writes.patch
+fuse-wait-for-writepages-in-syncfs.patch
+ima-remove-wmissing-prototypes-warning.patch
+ima-remove-the-dependency-on-crypto_md5.patch
diff --git a/queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch b/queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
new file mode 100644
index 00000000000..1c87eb794fb
--- /dev/null
+++ b/queue-5.13/x86-efi-restore-firmware-idt-before-calling-exitbootservices.patch
@@ -0,0 +1,127 @@
+From 22aa45cb465be474e97666b3f7587ccb06ee411b Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Fri, 20 Aug 2021 14:57:03 +0200
+Subject: x86/efi: Restore Firmware IDT before calling ExitBootServices()
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit 22aa45cb465be474e97666b3f7587ccb06ee411b upstream.
+
+Commit
+
+  79419e13e808 ("x86/boot/compressed/64: Setup IDT in startup_32 boot path")
+
+introduced an IDT into the 32-bit boot path of the decompressor stub.
+But the IDT is set up before ExitBootServices() is called, and some UEFI
+firmwares rely on their own IDT.
+
+Save the firmware IDT on boot and restore it before calling into EFI
+functions to fix boot failures introduced by above commit.
+
+Fixes: 79419e13e808 ("x86/boot/compressed/64: Setup IDT in startup_32 boot path")
+Reported-by: Fabio Aiuto <fabioaiuto83@gmail.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Cc: stable@vger.kernel.org # 5.13+
+Link: https://lkml.kernel.org/r/20210820125703.32410-1-joro@8bytes.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/efi_thunk_64.S |   30 +++++++++++++++++++++---------
+ arch/x86/boot/compressed/head_64.S      |    3 +++
+ 2 files changed, 24 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ b/arch/x86/boot/compressed/efi_thunk_64.S
+@@ -5,9 +5,8 @@
+  * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+  *
+  * Because this thunking occurs before ExitBootServices() we have to
+- * restore the firmware's 32-bit GDT before we make EFI service calls,
+- * since the firmware's 32-bit IDT is still currently installed and it
+- * needs to be able to service interrupts.
++ * restore the firmware's 32-bit GDT and IDT before we make EFI service
++ * calls.
+  *
+  * On the plus side, we don't have to worry about mangling 64-bit
+  * addresses into 32-bits because we're executing with an identity
+@@ -39,7 +38,7 @@ SYM_FUNC_START(__efi64_thunk)
+ 	/*
+ 	 * Convert x86-64 ABI params to i386 ABI
+ 	 */
+-	subq	$32, %rsp
++	subq	$64, %rsp
+ 	movl	%esi, 0x0(%rsp)
+ 	movl	%edx, 0x4(%rsp)
+ 	movl	%ecx, 0x8(%rsp)
+@@ -49,14 +48,19 @@ SYM_FUNC_START(__efi64_thunk)
+ 	leaq	0x14(%rsp), %rbx
+ 	sgdt	(%rbx)
+ 
++	addq	$16, %rbx
++	sidt	(%rbx)
++
+ 	/*
+-	 * Switch to gdt with 32-bit segments. This is the firmware GDT
+-	 * that was installed when the kernel started executing. This
+-	 * pointer was saved at the EFI stub entry point in head_64.S.
++	 * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
++	 * and IDT that was installed when the kernel started executing. The
++	 * pointers were saved at the EFI stub entry point in head_64.S.
+ 	 *
+ 	 * Pass the saved DS selector to the 32-bit code, and use far return to
+ 	 * restore the saved CS selector.
+ 	 */
++	leaq	efi32_boot_idt(%rip), %rax
++	lidt	(%rax)
+ 	leaq	efi32_boot_gdt(%rip), %rax
+ 	lgdt	(%rax)
+ 
+@@ -67,7 +71,7 @@ SYM_FUNC_START(__efi64_thunk)
+ 	pushq	%rax
+ 	lretq
+ 
+-1:	addq	$32, %rsp
++1:	addq	$64, %rsp
+ 	movq	%rdi, %rax
+ 
+ 	pop	%rbx
+@@ -128,10 +132,13 @@ SYM_FUNC_START_LOCAL(efi_enter32)
+ 
+ 	/*
+ 	 * Some firmware will return with interrupts enabled. Be sure to
+-	 * disable them before we switch GDTs.
++	 * disable them before we switch GDTs and IDTs.
+ 	 */
+ 	cli
+ 
++	lidtl	(%ebx)
++	subl	$16, %ebx
++
+ 	lgdtl	(%ebx)
+ 
+ 	movl	%cr4, %eax
+@@ -166,6 +173,11 @@ SYM_DATA_START(efi32_boot_gdt)
+ 	.quad	0
+ SYM_DATA_END(efi32_boot_gdt)
+ 
++SYM_DATA_START(efi32_boot_idt)
++	.word	0
++	.quad	0
++SYM_DATA_END(efi32_boot_idt)
++
+ SYM_DATA_START(efi32_boot_cs)
+ 	.word	0
+ SYM_DATA_END(efi32_boot_cs)
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -319,6 +319,9 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM
+ 	movw	%cs, rva(efi32_boot_cs)(%ebp)
+ 	movw	%ds, rva(efi32_boot_ds)(%ebp)
+ 
++	/* Store firmware IDT descriptor */
++	sidtl	rva(efi32_boot_idt)(%ebp)
++
+ 	/* Disable paging */
+ 	movl	%cr0, %eax
+ 	btrl	$X86_CR0_PG_BIT, %eax
diff --git a/queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch b/queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
new file mode 100644
index 00000000000..e358cb4346c
--- /dev/null
+++ b/queue-5.13/x86-resctrl-fix-a-maybe-uninitialized-build-warning-treated-as-error.patch
@@ -0,0 +1,67 @@
+From 527f721478bce3f49b513a733bacd19d6f34b08c Mon Sep 17 00:00:00 2001
+From: Babu Moger <babu.moger@amd.com>
+Date: Fri, 20 Aug 2021 16:52:42 -0500
+Subject: x86/resctrl: Fix a maybe-uninitialized build warning treated as error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Babu Moger <babu.moger@amd.com>
+
+commit 527f721478bce3f49b513a733bacd19d6f34b08c upstream.
+
+The recent commit
+
+  064855a69003 ("x86/resctrl: Fix default monitoring groups reporting")
+
+caused a RHEL build failure with an uninitialized variable warning
+treated as an error because it removed the default case snippet.
+
+The RHEL Makefile uses '-Werror=maybe-uninitialized' to force possibly
+uninitialized variable warnings to be treated as errors. This is also
+reported by smatch via the 0day robot.
+
+The error from the RHEL build is:
+
+  arch/x86/kernel/cpu/resctrl/monitor.c: In function â__mon_event_countâ:
+  arch/x86/kernel/cpu/resctrl/monitor.c:261:12: error: âmâ may be used
+  uninitialized in this function [-Werror=maybe-uninitialized]
+    m->chunks += chunks;
+              ^~
+
+The upstream Makefile does not build using '-Werror=maybe-uninitialized'.
+So, the problem is not seen there. Fix the problem by putting back the
+default case snippet.
+
+ [ bp: note that there's nothing wrong with the code and other compilers
+   do not trigger this warning - this is being done just so the RHEL compiler
+   is happy. ]
+
+Fixes: 064855a69003 ("x86/resctrl: Fix default monitoring groups reporting")
+Reported-by: Terry Bowman <Terry.Bowman@amd.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Babu Moger <babu.moger@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/162949631908.23903.17090272726012848523.stgit@bmoger-ubuntu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/monitor.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -304,6 +304,12 @@ static u64 __mon_event_count(u32 rmid, s
+ 	case QOS_L3_MBM_LOCAL_EVENT_ID:
+ 		m = &rr->d->mbm_local[rmid];
+ 		break;
++	default:
++		/*
++		 * Code would never reach here because an invalid
++		 * event id would fail the __rmid_read.
++		 */
++		return RMID_VAL_ERROR;
+ 	}
+ 
+ 	if (rr->first) {