From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 13 Sep 2021 11:36:42 +0000 (+0200)
Subject: 5.14-stable patches
X-Git-Tag: v5.4.146~12
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=74cc39ad1bb73035227d6289892a4eebc4b3f852;p=thirdparty%2Fkernel%2Fstable-queue.git

5.14-stable patches

added patches:
	arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
	char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
	fuse-flush-extending-writes.patch
	fuse-truncate-pagecache-on-atomic_o_trunc.patch
	fuse-wait-for-writepages-in-syncfs.patch
	ima-remove-the-dependency-on-crypto_md5.patch
	ima-remove-wmissing-prototypes-warning.patch
	io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
	kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
	kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch
	kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
	kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
	kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
	kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
	kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
	kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
	md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
	perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch
	revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
---

diff --git a/queue-5.14/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch b/queue-5.14/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
new file mode 100644
index 00000000000..3acb65df343
--- /dev/null
+++ b/queue-5.14/arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
@@ -0,0 +1,189 @@
+From bf781869e5cf3e4ec1a47dad69b6f0df97629cbd Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Tue, 27 Jul 2021 10:40:05 +0300
+Subject: ARM: dts: at91: add pinctrl-{names, 0} for all gpios
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit bf781869e5cf3e4ec1a47dad69b6f0df97629cbd upstream.
+
+Add pinctrl-names and pinctrl-0 properties on controllers that claims to
+use pins to avoid failures due to
+commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges")
+and also to avoid using pins that may be claimed my other IPs.
+
+Fixes: b7c2b6157079 ("ARM: at91: add Atmel's SAMA5D3 Xplained board")
+Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board")
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Link: https://lore.kernel.org/r/20210727074006.1609989-1-claudiu.beznea@microchip.com
+Cc: <stable@vger.kernel.org> # v5.7+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/at91-sam9x60ek.dts        |   16 ++++++++++++++-
+ arch/arm/boot/dts/at91-sama5d3_xplained.dts |   29 ++++++++++++++++++++++++++++
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts |   19 ++++++++++++++++++
+ 3 files changed, 63 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
++++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
+@@ -92,6 +92,8 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
+ 		status = "okay"; /* Conflict with pwm0. */
+ 
+ 		red {
+@@ -537,6 +539,10 @@
+ 				 AT91_PIOA 19 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)	/* PA19 DAT2 periph A with pullup */
+ 				 AT91_PIOA 20 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)>;	/* PA20 DAT3 periph A with pullup */
+ 		};
++		pinctrl_sdmmc0_cd: sdmmc0_cd {
++			atmel,pins =
++				<AT91_PIOA 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++		};
+ 	};
+ 
+ 	sdmmc1 {
+@@ -569,6 +575,14 @@
+ 				      AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ 		};
+ 	};
++
++	leds {
++		pinctrl_gpio_leds: gpio_leds {
++			atmel,pins = <AT91_PIOB 11 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++				      AT91_PIOB 12 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++				      AT91_PIOB 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++		};
++	};
+ }; /* pinctrl */
+ 
+ &pwm0 {
+@@ -580,7 +594,7 @@
+ &sdmmc0 {
+ 	bus-width = <4>;
+ 	pinctrl-names = "default";
+-	pinctrl-0 = <&pinctrl_sdmmc0_default>;
++	pinctrl-0 = <&pinctrl_sdmmc0_default &pinctrl_sdmmc0_cd>;
+ 	status = "okay";
+ 	cd-gpios = <&pioA 23 GPIO_ACTIVE_LOW>;
+ 	disable-wp;
+--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+@@ -57,6 +57,8 @@
+ 			};
+ 
+ 			spi0: spi@f0004000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi0_cs>;
+ 				cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
+ 				status = "okay";
+ 			};
+@@ -169,6 +171,8 @@
+ 			};
+ 
+ 			spi1: spi@f8008000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi1_cs>;
+ 				cs-gpios = <&pioC 25 0>;
+ 				status = "okay";
+ 			};
+@@ -248,6 +252,26 @@
+ 							<AT91_PIOE 3 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
+ 							 AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ 					};
++
++					pinctrl_gpio_leds: gpio_leds_default {
++						atmel,pins =
++							<AT91_PIOE 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOE 24 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_spi0_cs: spi0_cs_default {
++						atmel,pins =
++							<AT91_PIOD 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_spi1_cs: spi1_cs_default {
++						atmel,pins = <AT91_PIOC 25 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++
++					pinctrl_vcc_mmc0_reg_gpio: vcc_mmc0_reg_gpio_default {
++						atmel,pins = <AT91_PIOE 2 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
+ 				};
+ 			};
+ 		};
+@@ -339,6 +363,8 @@
+ 
+ 	vcc_mmc0_reg: fixedregulator_mmc0 {
+ 		compatible = "regulator-fixed";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_vcc_mmc0_reg_gpio>;
+ 		gpio = <&pioE 2 GPIO_ACTIVE_LOW>;
+ 		regulator-name = "mmc0-card-supply";
+ 		regulator-min-microvolt = <3300000>;
+@@ -362,6 +388,9 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
++		status = "okay";
+ 
+ 		d2 {
+ 			label = "d2";
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -90,6 +90,8 @@
+ 			};
+ 
+ 			spi1: spi@fc018000 {
++				pinctrl-names = "default";
++				pinctrl-0 = <&pinctrl_spi0_cs>;
+ 				cs-gpios = <&pioB 21 0>;
+ 				status = "okay";
+ 			};
+@@ -147,6 +149,19 @@
+ 						atmel,pins =
+ 							<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+ 					};
++					pinctrl_spi0_cs: spi0_cs_default {
++						atmel,pins =
++							<AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++					pinctrl_gpio_leds: gpio_leds_default {
++						atmel,pins =
++							<AT91_PIOD 30 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++							 AT91_PIOE 15 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
++					pinctrl_vcc_mmc1_reg: vcc_mmc1_reg {
++						atmel,pins =
++							<AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++					};
+ 				};
+ 			};
+ 		};
+@@ -252,6 +267,8 @@
+ 
+ 	leds {
+ 		compatible = "gpio-leds";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_gpio_leds>;
+ 		status = "okay";
+ 
+ 		d8 {
+@@ -278,6 +295,8 @@
+ 
+ 	vcc_mmc1_reg: fixedregulator_mmc1 {
+ 		compatible = "regulator-fixed";
++		pinctrl-names = "default";
++		pinctrl-0 = <&pinctrl_vcc_mmc1_reg>;
+ 		gpio = <&pioE 4 GPIO_ACTIVE_LOW>;
+ 		regulator-name = "VDD MCI1";
+ 		regulator-min-microvolt = <3300000>;
diff --git a/queue-5.14/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch b/queue-5.14/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
new file mode 100644
index 00000000000..18cafc45e41
--- /dev/null
+++ b/queue-5.14/char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
@@ -0,0 +1,33 @@
+From 847fdae1579f4ee930b01f24a7847b8043bf468c Mon Sep 17 00:00:00 2001
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+Date: Tue, 27 Jul 2021 20:13:12 +0300
+Subject: char: tpm: Kconfig: remove bad i2c cr50 select
+
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+
+commit 847fdae1579f4ee930b01f24a7847b8043bf468c upstream.
+
+This fixes a minor bug which went unnoticed during the initial
+driver upstreaming review: TCG_CR50 does not exist in mainline
+kernels, so remove it.
+
+Fixes: 3a253caaad11 ("char: tpm: add i2c driver for cr50")
+Cc: stable@vger.kernel.org
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/tpm/Kconfig
++++ b/drivers/char/tpm/Kconfig
+@@ -89,7 +89,6 @@ config TCG_TIS_SYNQUACER
+ config TCG_TIS_I2C_CR50
+ 	tristate "TPM Interface Specification 2.0 Interface (I2C - CR50)"
+ 	depends on I2C
+-	select TCG_CR50
+ 	help
+ 	  This is a driver for the Google cr50 I2C TPM interface which is a
+ 	  custom microcontroller and requires a custom i2c protocol interface
diff --git a/queue-5.14/fuse-flush-extending-writes.patch b/queue-5.14/fuse-flush-extending-writes.patch
new file mode 100644
index 00000000000..54411809a10
--- /dev/null
+++ b/queue-5.14/fuse-flush-extending-writes.patch
@@ -0,0 +1,49 @@
+From 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 31 Aug 2021 14:18:08 +0200
+Subject: fuse: flush extending writes
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd upstream.
+
+Callers of fuse_writeback_range() assume that the file is ready for
+modification by the server in the supplied byte range after the call
+returns.
+
+If there's a write that extends the file beyond the end of the supplied
+range, then the file needs to be extended to at least the end of the range,
+but currently that's not done.
+
+There are at least two cases where this can cause problems:
+
+ - copy_file_range() will return short count if the file is not extended
+   up to end of the source range.
+
+ - FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE will not extend the file,
+   hence the region may not be fully allocated.
+
+Fix by flushing writes from the start of the range up to the end of the
+file.  This could be optimized if the writes are non-extending, etc, but
+it's probably not worth the trouble.
+
+Fixes: a2bc92362941 ("fuse: fix copy_file_range() in the writeback case")
+Fixes: 6b1bdb56b17c ("fuse: allow fallocate(FALLOC_FL_ZERO_RANGE)")
+Cc: <stable@vger.kernel.org>  # v5.2
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -2884,7 +2884,7 @@ fuse_direct_IO(struct kiocb *iocb, struc
+ 
+ static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
+ {
+-	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
++	int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
+ 
+ 	if (!err)
+ 		fuse_sync_writes(inode);
diff --git a/queue-5.14/fuse-truncate-pagecache-on-atomic_o_trunc.patch b/queue-5.14/fuse-truncate-pagecache-on-atomic_o_trunc.patch
new file mode 100644
index 00000000000..04fd413702f
--- /dev/null
+++ b/queue-5.14/fuse-truncate-pagecache-on-atomic_o_trunc.patch
@@ -0,0 +1,58 @@
+From 76224355db7570cbe6b6f75c8929a1558828dd55 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 17 Aug 2021 21:05:16 +0200
+Subject: fuse: truncate pagecache on atomic_o_trunc
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 76224355db7570cbe6b6f75c8929a1558828dd55 upstream.
+
+fuse_finish_open() will be called with FUSE_NOWRITE in case of atomic
+O_TRUNC.  This can deadlock with fuse_wait_on_page_writeback() in
+fuse_launder_page() triggered by invalidate_inode_pages2().
+
+Fix by replacing invalidate_inode_pages2() in fuse_finish_open() with a
+truncate_pagecache() call.  This makes sense regardless of FOPEN_KEEP_CACHE
+or fc->writeback cache, so do it unconditionally.
+
+Reported-by: Xie Yongji <xieyongji@bytedance.com>
+Reported-and-tested-by: syzbot+bea44a5189836d956894@syzkaller.appspotmail.com
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inod
+ 	struct fuse_file *ff = file->private_data;
+ 	struct fuse_conn *fc = get_fuse_conn(inode);
+ 
+-	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+-		invalidate_inode_pages2(inode->i_mapping);
+ 	if (ff->open_flags & FOPEN_STREAM)
+ 		stream_open(inode, file);
+ 	else if (ff->open_flags & FOPEN_NONSEEKABLE)
+ 		nonseekable_open(inode, file);
++
+ 	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ 		struct fuse_inode *fi = get_fuse_inode(inode);
+ 
+@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inod
+ 		fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ 		i_size_write(inode, 0);
+ 		spin_unlock(&fi->lock);
++		truncate_pagecache(inode, 0);
+ 		fuse_invalidate_attr(inode);
+ 		if (fc->writeback_cache)
+ 			file_update_time(file);
++	} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
++		invalidate_inode_pages2(inode->i_mapping);
+ 	}
++
+ 	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+ 		fuse_link_write_file(file);
+ }
diff --git a/queue-5.14/fuse-wait-for-writepages-in-syncfs.patch b/queue-5.14/fuse-wait-for-writepages-in-syncfs.patch
new file mode 100644
index 00000000000..62b68e0069a
--- /dev/null
+++ b/queue-5.14/fuse-wait-for-writepages-in-syncfs.patch
@@ -0,0 +1,242 @@
+From 660585b56e63ca034ad506ea53c807c5cdca3196 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 1 Sep 2021 12:39:02 +0200
+Subject: fuse: wait for writepages in syncfs
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 660585b56e63ca034ad506ea53c807c5cdca3196 upstream.
+
+In case of fuse the MM subsystem doesn't guarantee that page writeback
+completes by the time ->sync_fs() is called.  This is because fuse
+completes page writeback immediately to prevent DoS of memory reclaim by
+the userspace file server.
+
+This means that fuse itself must ensure that writes are synced before
+sending the SYNCFS request to the server.
+
+Introduce sync buckets, that hold a counter for the number of outstanding
+write requests.  On syncfs replace the current bucket with a new one and
+wait until the old bucket's counter goes down to zero.
+
+It is possible to have multiple syncfs calls in parallel, in which case
+there could be more than one waited-on buckets.  Descendant buckets must
+not complete until the parent completes.  Add a count to the child (new)
+bucket until the (parent) old bucket completes.
+
+Use RCU protection to dereference the current bucket and to wake up an
+emptied bucket.  Use fc->lock to protect against parallel assignments to
+the current bucket.
+
+This leaves just the counter to be a possible scalability issue.  The
+fc->num_waiting counter has a similar issue, so both should be addressed at
+the same time.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Fixes: 2d82ab251ef0 ("virtiofs: propagate sync() to file server")
+Cc: <stable@vger.kernel.org> # v5.14
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c   |   21 +++++++++++++++++++
+ fs/fuse/fuse_i.h |   19 +++++++++++++++++
+ fs/fuse/inode.c  |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 100 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -392,6 +392,7 @@ struct fuse_writepage_args {
+ 	struct list_head queue_entry;
+ 	struct fuse_writepage_args *next;
+ 	struct inode *inode;
++	struct fuse_sync_bucket *bucket;
+ };
+ 
+ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
+@@ -1611,6 +1612,9 @@ static void fuse_writepage_free(struct f
+ 	struct fuse_args_pages *ap = &wpa->ia.ap;
+ 	int i;
+ 
++	if (wpa->bucket)
++		fuse_sync_bucket_dec(wpa->bucket);
++
+ 	for (i = 0; i < ap->num_pages; i++)
+ 		__free_page(ap->pages[i]);
+ 
+@@ -1874,6 +1878,20 @@ static struct fuse_writepage_args *fuse_
+ 
+ }
+ 
++static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
++					 struct fuse_writepage_args *wpa)
++{
++	if (!fc->sync_fs)
++		return;
++
++	rcu_read_lock();
++	/* Prevent resurrection of dead bucket in unlikely race with syncfs */
++	do {
++		wpa->bucket = rcu_dereference(fc->curr_bucket);
++	} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
++	rcu_read_unlock();
++}
++
+ static int fuse_writepage_locked(struct page *page)
+ {
+ 	struct address_space *mapping = page->mapping;
+@@ -1901,6 +1919,7 @@ static int fuse_writepage_locked(struct
+ 	if (!wpa->ia.ff)
+ 		goto err_nofile;
+ 
++	fuse_writepage_add_to_bucket(fc, wpa);
+ 	fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+ 
+ 	copy_highpage(tmp_page, page);
+@@ -2151,6 +2170,8 @@ static int fuse_writepages_fill(struct p
+ 			__free_page(tmp_page);
+ 			goto out_unlock;
+ 		}
++		fuse_writepage_add_to_bucket(fc, wpa);
++
+ 		data->max_pages = 1;
+ 
+ 		ap = &wpa->ia.ap;
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -515,6 +515,13 @@ struct fuse_fs_context {
+ 	void **fudptr;
+ };
+ 
++struct fuse_sync_bucket {
++	/* count is a possible scalability bottleneck */
++	atomic_t count;
++	wait_queue_head_t waitq;
++	struct rcu_head rcu;
++};
++
+ /**
+  * A Fuse connection.
+  *
+@@ -807,6 +814,9 @@ struct fuse_conn {
+ 
+ 	/** List of filesystems using this connection */
+ 	struct list_head mounts;
++
++	/* New writepages go into this bucket */
++	struct fuse_sync_bucket __rcu *curr_bucket;
+ };
+ 
+ /*
+@@ -910,6 +920,15 @@ static inline void fuse_page_descs_lengt
+ 		descs[i].length = PAGE_SIZE - descs[i].offset;
+ }
+ 
++static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
++{
++	/* Need RCU protection to prevent use after free after the decrement */
++	rcu_read_lock();
++	if (atomic_dec_and_test(&bucket->count))
++		wake_up(&bucket->waitq);
++	rcu_read_unlock();
++}
++
+ /** Device operations */
+ extern const struct file_operations fuse_dev_operations;
+ 
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -506,6 +506,57 @@ static int fuse_statfs(struct dentry *de
+ 	return err;
+ }
+ 
++static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
++{
++	struct fuse_sync_bucket *bucket;
++
++	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
++	if (bucket) {
++		init_waitqueue_head(&bucket->waitq);
++		/* Initial active count */
++		atomic_set(&bucket->count, 1);
++	}
++	return bucket;
++}
++
++static void fuse_sync_fs_writes(struct fuse_conn *fc)
++{
++	struct fuse_sync_bucket *bucket, *new_bucket;
++	int count;
++
++	new_bucket = fuse_sync_bucket_alloc();
++	spin_lock(&fc->lock);
++	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++	count = atomic_read(&bucket->count);
++	WARN_ON(count < 1);
++	/* No outstanding writes? */
++	if (count == 1) {
++		spin_unlock(&fc->lock);
++		kfree(new_bucket);
++		return;
++	}
++
++	/*
++	 * Completion of new bucket depends on completion of this bucket, so add
++	 * one more count.
++	 */
++	atomic_inc(&new_bucket->count);
++	rcu_assign_pointer(fc->curr_bucket, new_bucket);
++	spin_unlock(&fc->lock);
++	/*
++	 * Drop initial active count.  At this point if all writes in this and
++	 * ancestor buckets complete, the count will go to zero and this task
++	 * will be woken up.
++	 */
++	atomic_dec(&bucket->count);
++
++	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
++
++	/* Drop temp count on descendant bucket */
++	fuse_sync_bucket_dec(new_bucket);
++	kfree_rcu(bucket, rcu);
++}
++
+ static int fuse_sync_fs(struct super_block *sb, int wait)
+ {
+ 	struct fuse_mount *fm = get_fuse_mount_super(sb);
+@@ -528,6 +579,8 @@ static int fuse_sync_fs(struct super_blo
+ 	if (!fc->sync_fs)
+ 		return 0;
+ 
++	fuse_sync_fs_writes(fc);
++
+ 	memset(&inarg, 0, sizeof(inarg));
+ 	args.in_numargs = 1;
+ 	args.in_args[0].size = sizeof(inarg);
+@@ -763,6 +816,7 @@ void fuse_conn_put(struct fuse_conn *fc)
+ {
+ 	if (refcount_dec_and_test(&fc->count)) {
+ 		struct fuse_iqueue *fiq = &fc->iq;
++		struct fuse_sync_bucket *bucket;
+ 
+ 		if (IS_ENABLED(CONFIG_FUSE_DAX))
+ 			fuse_dax_conn_free(fc);
+@@ -770,6 +824,11 @@ void fuse_conn_put(struct fuse_conn *fc)
+ 			fiq->ops->release(fiq);
+ 		put_pid_ns(fc->pid_ns);
+ 		put_user_ns(fc->user_ns);
++		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++		if (bucket) {
++			WARN_ON(atomic_read(&bucket->count) != 1);
++			kfree(bucket);
++		}
+ 		fc->release(fc);
+ 	}
+ }
+@@ -1418,6 +1477,7 @@ int fuse_fill_super_common(struct super_
+ 	if (sb->s_flags & SB_MANDLOCK)
+ 		goto err;
+ 
++	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
+ 	fuse_sb_defaults(sb);
+ 
+ 	if (ctx->is_bdev) {
diff --git a/queue-5.14/ima-remove-the-dependency-on-crypto_md5.patch b/queue-5.14/ima-remove-the-dependency-on-crypto_md5.patch
new file mode 100644
index 00000000000..d8acf2e5ca7
--- /dev/null
+++ b/queue-5.14/ima-remove-the-dependency-on-crypto_md5.patch
@@ -0,0 +1,45 @@
+From 8510505d55e194d3f6c9644c9f9d12c4f6b0395a Mon Sep 17 00:00:00 2001
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+Date: Mon, 16 Aug 2021 08:10:59 +0000
+Subject: IMA: remove the dependency on CRYPTO_MD5
+
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+
+commit 8510505d55e194d3f6c9644c9f9d12c4f6b0395a upstream.
+
+MD5 is a weak digest algorithm that shouldn't be used for cryptographic
+operation. It hinders the efficiency of a patch set that aims to limit
+the digests allowed for the extended file attribute namely security.ima.
+MD5 is no longer a requirement for IMA, nor should it be used there.
+
+The sole place where we still use the MD5 algorithm inside IMA is setting
+the ima_hash algorithm to MD5, if the user supplies 'ima_hash=md5'
+parameter on the command line.  With commit ab60368ab6a4 ("ima: Fallback
+to the builtin hash algorithm"), setting "ima_hash=md5" fails gracefully
+when CRYPTO_MD5 is not set:
+	ima: Can not allocate md5 (reason: -2)
+	ima: Allocating md5 failed, going to use default hash algorithm sha256
+
+Remove the CRYPTO_MD5 dependency for IMA.
+
+Signed-off-by: THOBY Simon <Simon.THOBY@viveris.fr>
+Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
+[zohar@linux.ibm.com: include commit number in patch description for
+stable.]
+Cc: stable@vger.kernel.org # 4.17
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/security/integrity/ima/Kconfig
++++ b/security/integrity/ima/Kconfig
+@@ -6,7 +6,6 @@ config IMA
+ 	select SECURITYFS
+ 	select CRYPTO
+ 	select CRYPTO_HMAC
+-	select CRYPTO_MD5
+ 	select CRYPTO_SHA1
+ 	select CRYPTO_HASH_INFO
+ 	select TCG_TPM if HAS_IOMEM && !UML
diff --git a/queue-5.14/ima-remove-wmissing-prototypes-warning.patch b/queue-5.14/ima-remove-wmissing-prototypes-warning.patch
new file mode 100644
index 00000000000..4464b563fdb
--- /dev/null
+++ b/queue-5.14/ima-remove-wmissing-prototypes-warning.patch
@@ -0,0 +1,40 @@
+From a32ad90426a9c8eb3915eed26e08ce133bd9e0da Mon Sep 17 00:00:00 2001
+From: Austin Kim <austin.kim@lge.com>
+Date: Tue, 29 Jun 2021 14:50:50 +0100
+Subject: IMA: remove -Wmissing-prototypes warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Austin Kim <austin.kim@lge.com>
+
+commit a32ad90426a9c8eb3915eed26e08ce133bd9e0da upstream.
+
+With W=1 build, the compiler throws warning message as below:
+
+   security/integrity/ima/ima_mok.c:24:12: warning:
+   no previous prototype for âima_mok_initâ [-Wmissing-prototypes]
+       __init int ima_mok_init(void)
+
+Silence the warning by adding static keyword to ima_mok_init().
+
+Signed-off-by: Austin Kim <austin.kim@lge.com>
+Fixes: 41c89b64d718 ("IMA: create machine owner and blacklist keyrings")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/ima_mok.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/integrity/ima/ima_mok.c
++++ b/security/integrity/ima/ima_mok.c
+@@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring;
+ /*
+  * Allocate the IMA blacklist keyring
+  */
+-__init int ima_mok_init(void)
++static __init int ima_mok_init(void)
+ {
+ 	struct key_restriction *restriction;
+ 
diff --git a/queue-5.14/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch b/queue-5.14/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
new file mode 100644
index 00000000000..d02440ca4e0
--- /dev/null
+++ b/queue-5.14/io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
@@ -0,0 +1,104 @@
+From ecc53c48c13d995e6fe5559e30ffee48d92784fd Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 29 Aug 2021 16:13:03 -0600
+Subject: io-wq: check max_worker limits if a worker transitions bound state
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit ecc53c48c13d995e6fe5559e30ffee48d92784fd upstream.
+
+For the two places where new workers are created, we diligently check if
+we are allowed to create a new worker. If we're currently at the limit
+of how many workers of a given type we can have, then we don't create
+any new ones.
+
+If you have a mixed workload with various types of bound and unbounded
+work, then it can happen that a worker finishes one type of work and
+is then transitioned to the other type. For this case, we don't check
+if we are actually allowed to do so. This can cause io-wq to temporarily
+exceed the allowed number of workers for a given type.
+
+When retrieving work, check that the types match. If they don't, check
+if we are allowed to transition to the other type. If not, then don't
+handle the new work.
+
+Cc: stable@vger.kernel.org
+Reported-by: Johannes Lundberg <johalun0@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c |   33 ++++++++++++++++++++++++++++++---
+ 1 file changed, 30 insertions(+), 3 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -423,7 +423,28 @@ static void io_wait_on_hash(struct io_wq
+ 	spin_unlock(&wq->hash->wait.lock);
+ }
+ 
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
++/*
++ * We can always run the work if the worker is currently the same type as
++ * the work (eg both are bound, or both are unbound). If they are not the
++ * same, only allow it if incrementing the worker count would be allowed.
++ */
++static bool io_worker_can_run_work(struct io_worker *worker,
++				   struct io_wq_work *work)
++{
++	struct io_wqe_acct *acct;
++
++	if (!(worker->flags & IO_WORKER_F_BOUND) !=
++	    !(work->flags & IO_WQ_WORK_UNBOUND))
++		return true;
++
++	/* not the same type, check if we'd go over the limit */
++	acct = io_work_get_acct(worker->wqe, work);
++	return acct->nr_workers < acct->max_workers;
++}
++
++static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++					   struct io_worker *worker,
++					   bool *stalled)
+ 	__must_hold(wqe->lock)
+ {
+ 	struct io_wq_work_node *node, *prev;
+@@ -435,6 +456,9 @@ static struct io_wq_work *io_get_next_wo
+ 
+ 		work = container_of(node, struct io_wq_work, list);
+ 
++		if (!io_worker_can_run_work(worker, work))
++			break;
++
+ 		/* not hashed, can run anytime */
+ 		if (!io_wq_is_hashed(work)) {
+ 			wq_list_del(&wqe->work_list, node, prev);
+@@ -461,6 +485,7 @@ static struct io_wq_work *io_get_next_wo
+ 		raw_spin_unlock(&wqe->lock);
+ 		io_wait_on_hash(wqe, stall_hash);
+ 		raw_spin_lock(&wqe->lock);
++		*stalled = true;
+ 	}
+ 
+ 	return NULL;
+@@ -500,6 +525,7 @@ static void io_worker_handle_work(struct
+ 
+ 	do {
+ 		struct io_wq_work *work;
++		bool stalled;
+ get_next:
+ 		/*
+ 		 * If we got some work, mark us as busy. If we didn't, but
+@@ -508,10 +534,11 @@ get_next:
+ 		 * can't make progress, any work completion or insertion will
+ 		 * clear the stalled flag.
+ 		 */
+-		work = io_get_next_work(wqe);
++		stalled = false;
++		work = io_get_next_work(wqe, worker, &stalled);
+ 		if (work)
+ 			__io_worker_busy(wqe, worker, work);
+-		else if (!wq_list_empty(&wqe->work_list))
++		else if (stalled)
+ 			wqe->flags |= IO_WQE_FLAG_STALLED;
+ 
+ 		raw_spin_unlock_irq(&wqe->lock);
diff --git a/queue-5.14/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch b/queue-5.14/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
new file mode 100644
index 00000000000..ebe2642d477
--- /dev/null
+++ b/queue-5.14/kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
@@ -0,0 +1,54 @@
+From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Mon, 2 Aug 2021 13:38:30 +0100
+Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 47e6223c841e029bfc23c3ce594dac5525cebaf8 upstream.
+
+Booting a KVM host in protected mode with kmemleak quickly results
+in a pretty bad crash, as kmemleak doesn't know that the HYP sections
+have been taken away. This is specially true for the BSS section,
+which is part of the kernel BSS section and registered at boot time
+by kmemleak itself.
+
+Unregister the HYP part of the BSS before making that section
+HYP-private. The rest of the HYP-specific data is obtained via
+the page allocator or lives in other sections, none of which is
+subjected to kmemleak.
+
+Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 5.13
+Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/arm.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -15,6 +15,7 @@
+ #include <linux/fs.h>
+ #include <linux/mman.h>
+ #include <linux/sched.h>
++#include <linux/kmemleak.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
+@@ -1986,6 +1987,12 @@ static int finalize_hyp_mode(void)
+ 	if (ret)
+ 		return ret;
+ 
++	/*
++	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
++	 * at, which would end badly once the section is inaccessible.
++	 * None of other sections should ever be introspected.
++	 */
++	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ 	ret = pkvm_mark_hyp_section(__hyp_bss);
+ 	if (ret)
+ 		return ret;
diff --git a/queue-5.14/kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch b/queue-5.14/kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch
new file mode 100644
index 00000000000..9ba4888ca79
--- /dev/null
+++ b/queue-5.14/kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch
@@ -0,0 +1,216 @@
+From 3134cc8beb69d0db9de651081707c4651c011621 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 19 Aug 2021 19:03:05 +0100
+Subject: KVM: arm64: vgic: Resample HW pending state on deactivation
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 3134cc8beb69d0db9de651081707c4651c011621 upstream.
+
+When a mapped level interrupt (a timer, for example) is deactivated
+by the guest, the corresponding host interrupt is equally deactivated.
+However, the fate of the pending state still needs to be dealt
+with in SW.
+
+This is specially true when the interrupt was in the active+pending
+state in the virtual distributor at the point where the guest
+was entered. On exit, the pending state is potentially stale
+(the guest may have put the interrupt in a non-pending state).
+
+If we don't do anything, the interrupt will be spuriously injected
+in the guest. Although this shouldn't have any ill effect (spurious
+interrupts are always possible), we can improve the emulation by
+detecting the deactivation-while-pending case and resample the
+interrupt.
+
+While we're at it, move the logic into a common helper that can
+be shared between the two GIC implementations.
+
+Fixes: e40cc57bac79 ("KVM: arm/arm64: vgic: Support level-triggered mapped interrupts")
+Reported-by: Raghavendra Rao Ananta <rananta@google.com>
+Tested-by: Raghavendra Rao Ananta <rananta@google.com>
+Reviewed-by: Oliver Upton <oupton@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210819180305.1670525-1-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-v2.c |   36 +++++-------------------------------
+ arch/arm64/kvm/vgic/vgic-v3.c |   36 +++++-------------------------------
+ arch/arm64/kvm/vgic/vgic.c    |   38 ++++++++++++++++++++++++++++++++++++++
+ arch/arm64/kvm/vgic/vgic.h    |    2 ++
+ 4 files changed, 50 insertions(+), 62 deletions(-)
+
+--- a/arch/arm64/kvm/vgic/vgic-v2.c
++++ b/arch/arm64/kvm/vgic/vgic-v2.c
+@@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+ 		u32 val = cpuif->vgic_lr[lr];
+ 		u32 cpuid, intid = val & GICH_LR_VIRTUALID;
+ 		struct vgic_irq *irq;
++		bool deactivated;
+ 
+ 		/* Extract the source vCPU id from the LR */
+ 		cpuid = val & GICH_LR_PHYSID_CPUID;
+@@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+ 
+ 		raw_spin_lock(&irq->irq_lock);
+ 
+-		/* Always preserve the active bit */
++		/* Always preserve the active bit, note deactivation */
++		deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
+ 		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
+ 
+ 		if (irq->active && vgic_irq_is_sgi(intid))
+@@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+ 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
+ 			irq->pending_latch = false;
+ 
+-		/*
+-		 * Level-triggered mapped IRQs are special because we only
+-		 * observe rising edges as input to the VGIC.
+-		 *
+-		 * If the guest never acked the interrupt we have to sample
+-		 * the physical line and set the line level, because the
+-		 * device state could have changed or we simply need to
+-		 * process the still pending interrupt later.
+-		 *
+-		 * If this causes us to lower the level, we have to also clear
+-		 * the physical active state, since we will otherwise never be
+-		 * told when the interrupt becomes asserted again.
+-		 *
+-		 * Another case is when the interrupt requires a helping hand
+-		 * on deactivation (no HW deactivation, for example).
+-		 */
+-		if (vgic_irq_is_mapped_level(irq)) {
+-			bool resample = false;
+-
+-			if (val & GICH_LR_PENDING_BIT) {
+-				irq->line_level = vgic_get_phys_line_level(irq);
+-				resample = !irq->line_level;
+-			} else if (vgic_irq_needs_resampling(irq) &&
+-				   !(irq->active || irq->pending_latch)) {
+-				resample = true;
+-			}
+-
+-			if (resample)
+-				vgic_irq_set_phys_active(irq, false);
+-		}
++		/* Handle resampling for mapped interrupts if required */
++		vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
+ 
+ 		raw_spin_unlock(&irq->irq_lock);
+ 		vgic_put_irq(vcpu->kvm, irq);
+--- a/arch/arm64/kvm/vgic/vgic-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-v3.c
+@@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+ 		u32 intid, cpuid;
+ 		struct vgic_irq *irq;
+ 		bool is_v2_sgi = false;
++		bool deactivated;
+ 
+ 		cpuid = val & GICH_LR_PHYSID_CPUID;
+ 		cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+@@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+ 
+ 		raw_spin_lock(&irq->irq_lock);
+ 
+-		/* Always preserve the active bit */
++		/* Always preserve the active bit, note deactivation */
++		deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
+ 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
+ 
+ 		if (irq->active && is_v2_sgi)
+@@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+ 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
+ 			irq->pending_latch = false;
+ 
+-		/*
+-		 * Level-triggered mapped IRQs are special because we only
+-		 * observe rising edges as input to the VGIC.
+-		 *
+-		 * If the guest never acked the interrupt we have to sample
+-		 * the physical line and set the line level, because the
+-		 * device state could have changed or we simply need to
+-		 * process the still pending interrupt later.
+-		 *
+-		 * If this causes us to lower the level, we have to also clear
+-		 * the physical active state, since we will otherwise never be
+-		 * told when the interrupt becomes asserted again.
+-		 *
+-		 * Another case is when the interrupt requires a helping hand
+-		 * on deactivation (no HW deactivation, for example).
+-		 */
+-		if (vgic_irq_is_mapped_level(irq)) {
+-			bool resample = false;
+-
+-			if (val & ICH_LR_PENDING_BIT) {
+-				irq->line_level = vgic_get_phys_line_level(irq);
+-				resample = !irq->line_level;
+-			} else if (vgic_irq_needs_resampling(irq) &&
+-				   !(irq->active || irq->pending_latch)) {
+-				resample = true;
+-			}
+-
+-			if (resample)
+-				vgic_irq_set_phys_active(irq, false);
+-		}
++		/* Handle resampling for mapped interrupts if required */
++		vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
+ 
+ 		raw_spin_unlock(&irq->irq_lock);
+ 		vgic_put_irq(vcpu->kvm, irq);
+--- a/arch/arm64/kvm/vgic/vgic.c
++++ b/arch/arm64/kvm/vgic/vgic.c
+@@ -1022,3 +1022,41 @@ bool kvm_vgic_map_is_active(struct kvm_v
+ 
+ 	return map_is_active;
+ }
++
++/*
++ * Level-triggered mapped IRQs are special because we only observe rising
++ * edges as input to the VGIC.
++ *
++ * If the guest never acked the interrupt we have to sample the physical
++ * line and set the line level, because the device state could have changed
++ * or we simply need to process the still pending interrupt later.
++ *
++ * We could also have entered the guest with the interrupt active+pending.
++ * On the next exit, we need to re-evaluate the pending state, as it could
++ * otherwise result in a spurious interrupt by injecting a now potentially
++ * stale pending state.
++ *
++ * If this causes us to lower the level, we have to also clear the physical
++ * active state, since we will otherwise never be told when the interrupt
++ * becomes asserted again.
++ *
++ * Another case is when the interrupt requires a helping hand on
++ * deactivation (no HW deactivation, for example).
++ */
++void vgic_irq_handle_resampling(struct vgic_irq *irq,
++				bool lr_deactivated, bool lr_pending)
++{
++	if (vgic_irq_is_mapped_level(irq)) {
++		bool resample = false;
++
++		if (unlikely(vgic_irq_needs_resampling(irq))) {
++			resample = !(irq->active || irq->pending_latch);
++		} else if (lr_pending || (lr_deactivated && irq->line_level)) {
++			irq->line_level = vgic_get_phys_line_level(irq);
++			resample = !irq->line_level;
++		}
++
++		if (resample)
++			vgic_irq_set_phys_active(irq, false);
++	}
++}
+--- a/arch/arm64/kvm/vgic/vgic.h
++++ b/arch/arm64/kvm/vgic/vgic.h
+@@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgi
+ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ 			   unsigned long flags);
+ void vgic_kick_vcpus(struct kvm *kvm);
++void vgic_irq_handle_resampling(struct vgic_irq *irq,
++				bool lr_deactivated, bool lr_pending);
+ 
+ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+ 		      phys_addr_t addr, phys_addr_t alignment);
diff --git a/queue-5.14/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch b/queue-5.14/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
new file mode 100644
index 00000000000..1dbdd5fb096
--- /dev/null
+++ b/queue-5.14/kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
@@ -0,0 +1,60 @@
+From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 10 Aug 2021 07:45:26 -0700
+Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7782bb8d818d8f47c26b22079db10599922787a upstream.
+
+Clear nested.pi_pending on nested VM-Enter even if L2 will run without
+posted interrupts enabled.  If nested.pi_pending is left set from a
+previous L2, vmx_complete_nested_posted_interrupt() will pick up the
+stale flag and exit to userspace with an "internal emulation error" due
+the new L2 not having a valid nested.pi_desc.
+
+Arguably, vmx_complete_nested_posted_interrupt() should first check for
+posted interrupts being enabled, but it's also completely reasonable that
+KVM wouldn't screw up a fundamental flag.  Not to mention that the mere
+existence of nested.pi_pending is a long-standing bug as KVM shouldn't
+move the posted interrupt out of the IRR until it's actually processed,
+e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit
+with a "pending" posted interrupt.  Fixing the mess is a future problem.
+
+Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI
+descriptor as an error, this was a benign bug as the null PI descriptor
+effectively served as a check on PI not being enabled.  Even then, the
+new flow did not become problematic until KVM started checking the result
+of kvm_check_nested_events().
+
+Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
+Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable")
+Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails")
+Cc: stable@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210810144526.2662272-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2223,12 +2223,11 @@ static void prepare_vmcs02_early(struct
+ 			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
+ 
+ 	/* Posted interrupts setting is only taken from vmcs12.  */
+-	if (nested_cpu_has_posted_intr(vmcs12)) {
++	vmx->nested.pi_pending = false;
++	if (nested_cpu_has_posted_intr(vmcs12))
+ 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
+-		vmx->nested.pi_pending = false;
+-	} else {
++	else
+ 		exec_control &= ~PIN_BASED_POSTED_INTR;
+-	}
+ 	pin_controls_set(vmx, exec_control);
+ 
+ 	/*
diff --git a/queue-5.14/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch b/queue-5.14/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
new file mode 100644
index 00000000000..5ff7222f84a
--- /dev/null
+++ b/queue-5.14/kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
@@ -0,0 +1,122 @@
+From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001
+From: Halil Pasic <pasic@linux.ibm.com>
+Date: Fri, 27 Aug 2021 14:54:29 +0200
+Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Halil Pasic <pasic@linux.ibm.com>
+
+commit a3e03bc1368c1bc16e19b001fc96dc7430573cc8 upstream.
+
+While in practice vcpu->vcpu_idx ==  vcpu->vcp_id is often true, it may
+not always be, and we must not rely on this. Reason is that KVM decides
+the vcpu_idx, userspace decides the vcpu_id, thus the two might not
+match.
+
+Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies
+that code like
+for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+                vcpu = kvm_get_vcpu(kvm, vcpu_id);
+		do_stuff(vcpu);
+}
+is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an
+vcpu_id.  The trouble is, we do actually use kvm->arch.idle_mask like
+this. To fix this problem we have two options. Either use
+kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id,
+or switch to indexing via vcpu_idx. The latter is preferable for obvious
+reasons.
+
+Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to
+indexing it by vcpu_idx.  To keep gisa_int.kicked_mask indexed by the
+same index as idle_mask lets make the same change for it as well.
+
+Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array")
+Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
+Reviewed-by: Christian BorntrÃ¤ger <borntraeger@de.ibm.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 3.15+
+Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kvm_host.h |    1 +
+ arch/s390/kvm/interrupt.c        |   12 ++++++------
+ arch/s390/kvm/kvm-s390.c         |    2 +-
+ arch/s390/kvm/kvm-s390.h         |    2 +-
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -957,6 +957,7 @@ struct kvm_arch{
+ 	atomic64_t cmma_dirty_pages;
+ 	/* subset of available cpu features enabled by user space */
+ 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
++	/* indexed by vcpu_idx */
+ 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+ 	struct kvm_s390_gisa_interrupt gisa_int;
+ 	struct kvm_s390_pv pv;
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(st
+ static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+-	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vc
+ 
+ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
+ {
+-	int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
++	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
+ 	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ 	struct kvm_vcpu *vcpu;
+ 
+-	for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+-		vcpu = kvm_get_vcpu(kvm, vcpu_id);
++	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
++		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ 		if (psw_ioint_disabled(vcpu))
+ 			continue;
+ 		deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+ 		if (deliverable_mask) {
+ 			/* lately kicked but not yet running */
+-			if (test_and_set_bit(vcpu_id, gi->kicked_mask))
++			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
+ 				return;
+ 			kvm_s390_vcpu_wakeup(vcpu);
+ 			return;
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -4044,7 +4044,7 @@ static int vcpu_pre_run(struct kvm_vcpu
+ 		kvm_s390_patch_guest_per_regs(vcpu);
+ 	}
+ 
+-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
++	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+ 
+ 	vcpu->arch.sie_block->icptcode = 0;
+ 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct
+ 
+ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+ {
+-	return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++	return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+ 
+ static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/queue-5.14/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch b/queue-5.14/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
new file mode 100644
index 00000000000..ce946b3cfdb
--- /dev/null
+++ b/queue-5.14/kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
@@ -0,0 +1,34 @@
+From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 26 Aug 2021 12:57:49 +0300
+Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 upstream.
+
+If we are emulating an invalid guest state, we don't have a correct
+exit reason, and thus we shouldn't do anything in this function.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6368,6 +6368,9 @@ static void vmx_handle_exit_irqoff(struc
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 
++	if (vmx->emulation_required)
++		return;
++
+ 	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+ 		handle_external_interrupt_irqoff(vcpu);
+ 	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
diff --git a/queue-5.14/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch b/queue-5.14/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
new file mode 100644
index 00000000000..72414135269
--- /dev/null
+++ b/queue-5.14/kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
@@ -0,0 +1,74 @@
+From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 6 Aug 2021 07:05:58 -0400
+Subject: KVM: x86: clamp host mapping level to max_level in kvm_mmu_max_mapping_level
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit ec607a564f70519b340f7eb4cfc0f4a6b55285ac upstream.
+
+This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler,
+but it does fix two bugs as well.
+
+One bug is in zapping collapsible PTEs.  If a large page size is
+disallowed but not all of them, kvm_mmu_max_mapping_level will return the
+host mapping level and the small PTEs will be zapped up to that level.
+However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and
+preserve the 2MB ones. This can happen for example when NX huge pages
+are in use.
+
+The second would happen when userspace backs guest memory
+with a 1gb hugepage but only assign a subset of the page to
+the guest.  1gb pages would be disallowed by the memslot, but
+not 2mb.  kvm_mmu_max_mapping_level() would fall through to the
+host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole
+thing into the guest.
+
+Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2846,6 +2846,7 @@ int kvm_mmu_max_mapping_level(struct kvm
+ 			      kvm_pfn_t pfn, int max_level)
+ {
+ 	struct kvm_lpage_info *linfo;
++	int host_level;
+ 
+ 	max_level = min(max_level, max_huge_page_level);
+ 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
+@@ -2857,7 +2858,8 @@ int kvm_mmu_max_mapping_level(struct kvm
+ 	if (max_level == PG_LEVEL_4K)
+ 		return PG_LEVEL_4K;
+ 
+-	return host_pfn_mapping_level(kvm, gfn, pfn, slot);
++	host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
++	return min(host_level, max_level);
+ }
+ 
+ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+@@ -2881,17 +2883,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_v
+ 	if (!slot)
+ 		return PG_LEVEL_4K;
+ 
+-	level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+-	if (level == PG_LEVEL_4K)
+-		return level;
+-
+-	*req_level = level = min(level, max_level);
+-
+ 	/*
+ 	 * Enforce the iTLB multihit workaround after capturing the requested
+ 	 * level, which will be used to do precise, accurate accounting.
+ 	 */
+-	if (huge_page_disallowed)
++	*req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
++	if (level == PG_LEVEL_4K || huge_page_disallowed)
+ 		return PG_LEVEL_4K;
+ 
+ 	/*
diff --git a/queue-5.14/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch b/queue-5.14/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
new file mode 100644
index 00000000000..1e3f5fa1d56
--- /dev/null
+++ b/queue-5.14/kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
@@ -0,0 +1,82 @@
+From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 2 Aug 2021 21:46:06 -0700
+Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage stats
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 088acd23526647844aec1c39db4ad02552c86c7b upstream.
+
+Factor in whether or not the old/new SPTEs are shadow-present when
+adjusting the large page stats in the TDP MMU.  A modified MMIO SPTE can
+toggle the page size bit, as bit 7 is used to store the MMIO generation,
+i.e. is_large_pte() can get a false positive when called on a MMIO SPTE.
+Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic
+value.
+
+Opportunistically move the logic below the check to verify at least one
+of the old/new SPTEs is shadow present.
+
+Use is/was_leaf even though is/was_present would suffice.  The code
+generation is roughly equivalent since all flags need to be computed
+prior to the code in question, and using the *_leaf flags will minimize
+the diff in a future enhancement to account all pages, i.e. will change
+the check to "is_leaf != was_leaf".
+
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+
+Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20210803044607.599629-3-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -412,6 +412,7 @@ static void __handle_changed_spte(struct
+ 	bool was_leaf = was_present && is_last_spte(old_spte, level);
+ 	bool is_leaf = is_present && is_last_spte(new_spte, level);
+ 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
++	bool was_large, is_large;
+ 
+ 	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ 	WARN_ON(level < PG_LEVEL_4K);
+@@ -445,13 +446,6 @@ static void __handle_changed_spte(struct
+ 
+ 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ 
+-	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+-		if (is_large_pte(old_spte))
+-			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+-		else
+-			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+-	}
+-
+ 	/*
+ 	 * The only times a SPTE should be changed from a non-present to
+ 	 * non-present state is when an MMIO entry is installed/modified/
+@@ -477,6 +471,18 @@ static void __handle_changed_spte(struct
+ 		return;
+ 	}
+ 
++	/*
++	 * Update large page stats if a large page is being zapped, created, or
++	 * is replacing an existing shadow page.
++	 */
++	was_large = was_leaf && is_large_pte(old_spte);
++	is_large = is_leaf && is_large_pte(new_spte);
++	if (was_large != is_large) {
++		if (was_large)
++			atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
++		else
++			atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
++	}
+ 
+ 	if (was_leaf && is_dirty_spte(old_spte) &&
+ 	    (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
diff --git a/queue-5.14/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch b/queue-5.14/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
new file mode 100644
index 00000000000..431b7d94eb2
--- /dev/null
+++ b/queue-5.14/kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
@@ -0,0 +1,40 @@
+From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+Date: Wed, 28 Apr 2021 10:22:01 +0800
+Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted
+
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+
+commit d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 upstream.
+
+When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature
+especially there's a big tsc warp (like a new vCPU is hot-added into VM
+which has been up for a long time), tsc_offset is added by a large value
+then go back to guest. This causes system time jump as tsc_timestamp is
+not adjusted in the meantime and pvclock monotonic character.
+To fix this, just notify kvm to update vCPU's guest time before back to
+guest.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3316,6 +3316,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
+ 			if (!msr_info->host_initiated) {
+ 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+ 				adjust_tsc_offset_guest(vcpu, adj);
++				/* Before back to guest, tsc_timestamp must be adjusted
++				 * as well, otherwise guest's percpu pvclock time could jump.
++				 */
++				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ 			}
+ 			vcpu->arch.ia32_tsc_adjust_msr = data;
+ 		}
diff --git a/queue-5.14/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch b/queue-5.14/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
new file mode 100644
index 00000000000..1d24ed89911
--- /dev/null
+++ b/queue-5.14/md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
@@ -0,0 +1,82 @@
+From 46d4703b1db4c86ab5acb2331b10df999f005e8e Mon Sep 17 00:00:00 2001
+From: Xiao Ni <xni@redhat.com>
+Date: Wed, 18 Aug 2021 13:57:48 +0800
+Subject: md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
+
+From: Xiao Ni <xni@redhat.com>
+
+commit 46d4703b1db4c86ab5acb2331b10df999f005e8e upstream.
+
+We are seeing the following warning in raid10_handle_discard.
+[  695.110751] =============================
+[  695.131439] WARNING: suspicious RCU usage
+[  695.151389] 4.18.0-319.el8.x86_64+debug #1 Not tainted
+[  695.174413] -----------------------------
+[  695.192603] drivers/md/raid10.c:1776 suspicious
+rcu_dereference_check() usage!
+[  695.225107] other info that might help us debug this:
+[  695.260940] rcu_scheduler_active = 2, debug_locks = 1
+[  695.290157] no locks held by mkfs.xfs/10186.
+
+In the first loop of function raid10_handle_discard. It already
+determines which disk need to handle discard request and add the
+rdev reference count rdev->nr_pending. So the conf->mirrors will
+not change until all bios come back from underlayer disks. It
+doesn't need to use rcu_dereference to get rdev.
+
+Cc: stable@vger.kernel.org
+Fixes: d30588b2731f ('md/raid10: improve raid10 discard request')
+Signed-off-by: Xiao Ni <xni@redhat.com>
+Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid10.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1712,6 +1712,11 @@ retry_discard:
+ 	} else
+ 		r10_bio->master_bio = (struct bio *)first_r10bio;
+ 
++	/*
++	 * first select target devices under rcu_lock and
++	 * inc refcount on their rdev.  Record them by setting
++	 * bios[x] to bio
++	 */
+ 	rcu_read_lock();
+ 	for (disk = 0; disk < geo->raid_disks; disk++) {
+ 		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+@@ -1743,9 +1748,6 @@ retry_discard:
+ 	for (disk = 0; disk < geo->raid_disks; disk++) {
+ 		sector_t dev_start, dev_end;
+ 		struct bio *mbio, *rbio = NULL;
+-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+-		struct md_rdev *rrdev = rcu_dereference(
+-			conf->mirrors[disk].replacement);
+ 
+ 		/*
+ 		 * Now start to calculate the start and end address for each disk.
+@@ -1775,9 +1777,12 @@ retry_discard:
+ 
+ 		/*
+ 		 * It only handles discard bio which size is >= stripe size, so
+-		 * dev_end > dev_start all the time
++		 * dev_end > dev_start all the time.
++		 * It doesn't need to use rcu lock to get rdev here. We already
++		 * add rdev->nr_pending in the first loop.
+ 		 */
+ 		if (r10_bio->devs[disk].bio) {
++			struct md_rdev *rdev = conf->mirrors[disk].rdev;
+ 			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ 			mbio->bi_end_io = raid10_end_discard_request;
+ 			mbio->bi_private = r10_bio;
+@@ -1790,6 +1795,7 @@ retry_discard:
+ 			bio_endio(mbio);
+ 		}
+ 		if (r10_bio->devs[disk].repl_bio) {
++			struct md_rdev *rrdev = conf->mirrors[disk].replacement;
+ 			rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ 			rbio->bi_end_io = raid10_end_discard_request;
+ 			rbio->bi_private = r10_bio;
diff --git a/queue-5.14/perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch b/queue-5.14/perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch
new file mode 100644
index 00000000000..afac78ef61e
--- /dev/null
+++ b/queue-5.14/perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch
@@ -0,0 +1,113 @@
+From 3f2cbe3810a60111a33f5f6267bd5a237b826fc9 Mon Sep 17 00:00:00 2001
+From: Alexander Antonov <alexander.antonov@linux.intel.com>
+Date: Tue, 6 Jul 2021 12:07:23 +0300
+Subject: perf/x86/intel/uncore: Fix IIO cleanup mapping procedure for SNR/ICX
+
+From: Alexander Antonov <alexander.antonov@linux.intel.com>
+
+commit 3f2cbe3810a60111a33f5f6267bd5a237b826fc9 upstream.
+
+skx_iio_cleanup_mapping() is re-used for snr and icx, but in those
+cases it fails to use the appropriate XXX_iio_mapping_group and as
+such fails to free previously allocated resources, leading to memory
+leaks.
+
+Fixes: 10337e95e04c ("perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on ICX")
+Signed-off-by: Alexander Antonov <alexander.antonov@linux.intel.com>
+[peterz: Changelog]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210706090723.41850-1-alexander.antonov@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/uncore_snbep.c |   40 ++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/events/intel/uncore_snbep.c
++++ b/arch/x86/events/intel/uncore_snbep.c
+@@ -3838,26 +3838,32 @@ clear_attr_update:
+ 	return ret;
+ }
+ 
+-static int skx_iio_set_mapping(struct intel_uncore_type *type)
+-{
+-	return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+-}
+-
+-static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
++static void
++pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+ {
+-	struct attribute **attr = skx_iio_mapping_group.attrs;
++	struct attribute **attr = ag->attrs;
+ 
+ 	if (!attr)
+ 		return;
+ 
+ 	for (; *attr; attr++)
+ 		kfree((*attr)->name);
+-	kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
+-	kfree(skx_iio_mapping_group.attrs);
+-	skx_iio_mapping_group.attrs = NULL;
++	kfree(attr_to_ext_attr(*ag->attrs));
++	kfree(ag->attrs);
++	ag->attrs = NULL;
+ 	kfree(type->topology);
+ }
+ 
++static int skx_iio_set_mapping(struct intel_uncore_type *type)
++{
++	return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
++}
++
++static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++	pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group);
++}
++
+ static struct intel_uncore_type skx_uncore_iio = {
+ 	.name			= "iio",
+ 	.num_counters		= 4,
+@@ -4501,6 +4507,11 @@ static int snr_iio_set_mapping(struct in
+ 	return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+ }
+ 
++static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++	pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
++}
++
+ static struct intel_uncore_type snr_uncore_iio = {
+ 	.name			= "iio",
+ 	.num_counters		= 4,
+@@ -4517,7 +4528,7 @@ static struct intel_uncore_type snr_unco
+ 	.attr_update		= snr_iio_attr_update,
+ 	.get_topology		= snr_iio_get_topology,
+ 	.set_mapping		= snr_iio_set_mapping,
+-	.cleanup_mapping	= skx_iio_cleanup_mapping,
++	.cleanup_mapping	= snr_iio_cleanup_mapping,
+ };
+ 
+ static struct intel_uncore_type snr_uncore_irp = {
+@@ -5092,6 +5103,11 @@ static int icx_iio_set_mapping(struct in
+ 	return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+ }
+ 
++static void icx_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++	pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group);
++}
++
+ static struct intel_uncore_type icx_uncore_iio = {
+ 	.name			= "iio",
+ 	.num_counters		= 4,
+@@ -5109,7 +5125,7 @@ static struct intel_uncore_type icx_unco
+ 	.attr_update		= icx_iio_attr_update,
+ 	.get_topology		= icx_iio_get_topology,
+ 	.set_mapping		= icx_iio_set_mapping,
+-	.cleanup_mapping	= skx_iio_cleanup_mapping,
++	.cleanup_mapping	= icx_iio_cleanup_mapping,
+ };
+ 
+ static struct intel_uncore_type icx_uncore_irp = {
diff --git a/queue-5.14/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch b/queue-5.14/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
new file mode 100644
index 00000000000..080320384b0
--- /dev/null
+++ b/queue-5.14/revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
@@ -0,0 +1,72 @@
+From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 31 Aug 2021 09:42:22 -0700
+Subject: Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()"
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e7177339d7b5f9594b316842122b5fda9513d5e2 upstream.
+
+Revert a misguided illegal GPA check when "translating" a non-nested GPA.
+The check is woefully incomplete as it does not fill in @exception as
+expected by all callers, which leads to KVM attempting to inject a bogus
+exception, potentially exposing kernel stack information in the process.
+
+ WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0
+ RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ Call Trace:
+  x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853
+  kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199
+  handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336
+  __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline]
+  vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038
+  vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712
+  vcpu_run arch/x86/kvm/x86.c:9779 [inline]
+  kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010
+  kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652
+
+The bug has escaped notice because practically speaking the GPA check is
+useless.  The GPA check in question only comes into play when KVM is
+walking guest page tables (or "translating" CR3), and KVM already handles
+illegal GPA checks by setting reserved bits in rsvd_bits_mask for each
+PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an
+illegal CR3.  This particular failure doesn't hit the existing reserved
+bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture
+simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging
+doesn't define any reserved PA bits checks, which KVM emulates by only
+incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32.
+
+Simply remove the bogus check.  There is zero meaningful value and no
+architectural justification for supporting guest.MAXPHYADDR < 32, and
+properly filling the exception would introduce non-trivial complexity.
+
+This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013.
+
+Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210831164224.1119728-2-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -323,12 +323,6 @@ static bool check_mmio_spte(struct kvm_v
+ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                                   struct x86_exception *exception)
+ {
+-	/* Check if guest physical address doesn't exceed guest maximum */
+-	if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
+-		exception->error_code |= PFERR_RSVD_MASK;
+-		return UNMAPPED_GVA;
+-	}
+-
+         return gpa;
+ }
+ 
diff --git a/queue-5.14/series b/queue-5.14/series
index 5508e0caff0..957b327ecf4 100644
--- a/queue-5.14/series
+++ b/queue-5.14/series
@@ -306,3 +306,22 @@ raid1-ensure-write-behind-bio-has-less-than-bio_max_vecs-sectors.patch
 cifs-do-not-leak-edeadlk-to-dgetents64-for-status_user_session_deleted.patch
 smb3-fix-posix-extensions-mount-option.patch
 tty-fix-data-race-between-tiocsti-and-flush_to_ldisc.patch
+perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch
+revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
+kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
+kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
+kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
+kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
+kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
+kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
+kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
+kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch
+arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
+io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
+md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
+char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
+fuse-truncate-pagecache-on-atomic_o_trunc.patch
+fuse-flush-extending-writes.patch
+fuse-wait-for-writepages-in-syncfs.patch
+ima-remove-wmissing-prototypes-warning.patch
+ima-remove-the-dependency-on-crypto_md5.patch