--- /dev/null
+From bf781869e5cf3e4ec1a47dad69b6f0df97629cbd Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Tue, 27 Jul 2021 10:40:05 +0300
+Subject: ARM: dts: at91: add pinctrl-{names, 0} for all gpios
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit bf781869e5cf3e4ec1a47dad69b6f0df97629cbd upstream.
+
+Add pinctrl-names and pinctrl-0 properties on controllers that claims to
+use pins to avoid failures due to
+commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges")
+and also to avoid using pins that may be claimed my other IPs.
+
+Fixes: b7c2b6157079 ("ARM: at91: add Atmel's SAMA5D3 Xplained board")
+Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board")
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Link: https://lore.kernel.org/r/20210727074006.1609989-1-claudiu.beznea@microchip.com
+Cc: <stable@vger.kernel.org> # v5.7+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/at91-sam9x60ek.dts | 16 ++++++++++++++-
+ arch/arm/boot/dts/at91-sama5d3_xplained.dts | 29 ++++++++++++++++++++++++++++
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts | 19 ++++++++++++++++++
+ 3 files changed, 63 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
++++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
+@@ -92,6 +92,8 @@
+
+ leds {
+ compatible = "gpio-leds";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_gpio_leds>;
+ status = "okay"; /* Conflict with pwm0. */
+
+ red {
+@@ -537,6 +539,10 @@
+ AT91_PIOA 19 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI) /* PA19 DAT2 periph A with pullup */
+ AT91_PIOA 20 AT91_PERIPH_A (AT91_PINCTRL_PULL_UP | AT91_PINCTRL_DRIVE_STRENGTH_HI)>; /* PA20 DAT3 periph A with pullup */
+ };
++ pinctrl_sdmmc0_cd: sdmmc0_cd {
++ atmel,pins =
++ <AT91_PIOA 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
+ };
+
+ sdmmc1 {
+@@ -569,6 +575,14 @@
+ AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ };
+ };
++
++ leds {
++ pinctrl_gpio_leds: gpio_leds {
++ atmel,pins = <AT91_PIOB 11 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++ AT91_PIOB 12 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++ AT91_PIOB 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++ };
+ }; /* pinctrl */
+
+ &pwm0 {
+@@ -580,7 +594,7 @@
+ &sdmmc0 {
+ bus-width = <4>;
+ pinctrl-names = "default";
+- pinctrl-0 = <&pinctrl_sdmmc0_default>;
++ pinctrl-0 = <&pinctrl_sdmmc0_default &pinctrl_sdmmc0_cd>;
+ status = "okay";
+ cd-gpios = <&pioA 23 GPIO_ACTIVE_LOW>;
+ disable-wp;
+--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+@@ -57,6 +57,8 @@
+ };
+
+ spi0: spi@f0004000 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_spi0_cs>;
+ cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
+ status = "okay";
+ };
+@@ -169,6 +171,8 @@
+ };
+
+ spi1: spi@f8008000 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_spi1_cs>;
+ cs-gpios = <&pioC 25 0>;
+ status = "okay";
+ };
+@@ -248,6 +252,26 @@
+ <AT91_PIOE 3 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
+ AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ };
++
++ pinctrl_gpio_leds: gpio_leds_default {
++ atmel,pins =
++ <AT91_PIOE 23 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++ AT91_PIOE 24 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++
++ pinctrl_spi0_cs: spi0_cs_default {
++ atmel,pins =
++ <AT91_PIOD 13 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++ AT91_PIOD 16 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++
++ pinctrl_spi1_cs: spi1_cs_default {
++ atmel,pins = <AT91_PIOC 25 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++
++ pinctrl_vcc_mmc0_reg_gpio: vcc_mmc0_reg_gpio_default {
++ atmel,pins = <AT91_PIOE 2 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
+ };
+ };
+ };
+@@ -339,6 +363,8 @@
+
+ vcc_mmc0_reg: fixedregulator_mmc0 {
+ compatible = "regulator-fixed";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_vcc_mmc0_reg_gpio>;
+ gpio = <&pioE 2 GPIO_ACTIVE_LOW>;
+ regulator-name = "mmc0-card-supply";
+ regulator-min-microvolt = <3300000>;
+@@ -362,6 +388,9 @@
+
+ leds {
+ compatible = "gpio-leds";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_gpio_leds>;
++ status = "okay";
+
+ d2 {
+ label = "d2";
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -90,6 +90,8 @@
+ };
+
+ spi1: spi@fc018000 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_spi0_cs>;
+ cs-gpios = <&pioB 21 0>;
+ status = "okay";
+ };
+@@ -147,6 +149,19 @@
+ atmel,pins =
+ <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+ };
++ pinctrl_spi0_cs: spi0_cs_default {
++ atmel,pins =
++ <AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++ pinctrl_gpio_leds: gpio_leds_default {
++ atmel,pins =
++ <AT91_PIOD 30 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
++ AT91_PIOE 15 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
++ pinctrl_vcc_mmc1_reg: vcc_mmc1_reg {
++ atmel,pins =
++ <AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
+ };
+ };
+ };
+@@ -252,6 +267,8 @@
+
+ leds {
+ compatible = "gpio-leds";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_gpio_leds>;
+ status = "okay";
+
+ d8 {
+@@ -278,6 +295,8 @@
+
+ vcc_mmc1_reg: fixedregulator_mmc1 {
+ compatible = "regulator-fixed";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_vcc_mmc1_reg>;
+ gpio = <&pioE 4 GPIO_ACTIVE_LOW>;
+ regulator-name = "VDD MCI1";
+ regulator-min-microvolt = <3300000>;
--- /dev/null
+From 847fdae1579f4ee930b01f24a7847b8043bf468c Mon Sep 17 00:00:00 2001
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+Date: Tue, 27 Jul 2021 20:13:12 +0300
+Subject: char: tpm: Kconfig: remove bad i2c cr50 select
+
+From: Adrian Ratiu <adrian.ratiu@collabora.com>
+
+commit 847fdae1579f4ee930b01f24a7847b8043bf468c upstream.
+
+This fixes a minor bug which went unnoticed during the initial
+driver upstreaming review: TCG_CR50 does not exist in mainline
+kernels, so remove it.
+
+Fixes: 3a253caaad11 ("char: tpm: add i2c driver for cr50")
+Cc: stable@vger.kernel.org
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/Kconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/tpm/Kconfig
++++ b/drivers/char/tpm/Kconfig
+@@ -89,7 +89,6 @@ config TCG_TIS_SYNQUACER
+ config TCG_TIS_I2C_CR50
+ tristate "TPM Interface Specification 2.0 Interface (I2C - CR50)"
+ depends on I2C
+- select TCG_CR50
+ help
+ This is a driver for the Google cr50 I2C TPM interface which is a
+ custom microcontroller and requires a custom i2c protocol interface
--- /dev/null
+From 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 31 Aug 2021 14:18:08 +0200
+Subject: fuse: flush extending writes
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 59bda8ecee2ffc6a602b7bf2b9e43ca669cdbdcd upstream.
+
+Callers of fuse_writeback_range() assume that the file is ready for
+modification by the server in the supplied byte range after the call
+returns.
+
+If there's a write that extends the file beyond the end of the supplied
+range, then the file needs to be extended to at least the end of the range,
+but currently that's not done.
+
+There are at least two cases where this can cause problems:
+
+ - copy_file_range() will return short count if the file is not extended
+ up to end of the source range.
+
+ - FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE will not extend the file,
+ hence the region may not be fully allocated.
+
+Fix by flushing writes from the start of the range up to the end of the
+file. This could be optimized if the writes are non-extending, etc, but
+it's probably not worth the trouble.
+
+Fixes: a2bc92362941 ("fuse: fix copy_file_range() in the writeback case")
+Fixes: 6b1bdb56b17c ("fuse: allow fallocate(FALLOC_FL_ZERO_RANGE)")
+Cc: <stable@vger.kernel.org> # v5.2
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -2884,7 +2884,7 @@ fuse_direct_IO(struct kiocb *iocb, struc
+
+ static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
+ {
+- int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
++ int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
+
+ if (!err)
+ fuse_sync_writes(inode);
--- /dev/null
+From 76224355db7570cbe6b6f75c8929a1558828dd55 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Tue, 17 Aug 2021 21:05:16 +0200
+Subject: fuse: truncate pagecache on atomic_o_trunc
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 76224355db7570cbe6b6f75c8929a1558828dd55 upstream.
+
+fuse_finish_open() will be called with FUSE_NOWRITE in case of atomic
+O_TRUNC. This can deadlock with fuse_wait_on_page_writeback() in
+fuse_launder_page() triggered by invalidate_inode_pages2().
+
+Fix by replacing invalidate_inode_pages2() in fuse_finish_open() with a
+truncate_pagecache() call. This makes sense regardless of FOPEN_KEEP_CACHE
+or fc->writeback cache, so do it unconditionally.
+
+Reported-by: Xie Yongji <xieyongji@bytedance.com>
+Reported-and-tested-by: syzbot+bea44a5189836d956894@syzkaller.appspotmail.com
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inod
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+- if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+- invalidate_inode_pages2(inode->i_mapping);
+ if (ff->open_flags & FOPEN_STREAM)
+ stream_open(inode, file);
+ else if (ff->open_flags & FOPEN_NONSEEKABLE)
+ nonseekable_open(inode, file);
++
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inod
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ i_size_write(inode, 0);
+ spin_unlock(&fi->lock);
++ truncate_pagecache(inode, 0);
+ fuse_invalidate_attr(inode);
+ if (fc->writeback_cache)
+ file_update_time(file);
++ } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
++ invalidate_inode_pages2(inode->i_mapping);
+ }
++
+ if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+ fuse_link_write_file(file);
+ }
--- /dev/null
+From 660585b56e63ca034ad506ea53c807c5cdca3196 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 1 Sep 2021 12:39:02 +0200
+Subject: fuse: wait for writepages in syncfs
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 660585b56e63ca034ad506ea53c807c5cdca3196 upstream.
+
+In case of fuse the MM subsystem doesn't guarantee that page writeback
+completes by the time ->sync_fs() is called. This is because fuse
+completes page writeback immediately to prevent DoS of memory reclaim by
+the userspace file server.
+
+This means that fuse itself must ensure that writes are synced before
+sending the SYNCFS request to the server.
+
+Introduce sync buckets, that hold a counter for the number of outstanding
+write requests. On syncfs replace the current bucket with a new one and
+wait until the old bucket's counter goes down to zero.
+
+It is possible to have multiple syncfs calls in parallel, in which case
+there could be more than one waited-on buckets. Descendant buckets must
+not complete until the parent completes. Add a count to the child (new)
+bucket until the (parent) old bucket completes.
+
+Use RCU protection to dereference the current bucket and to wake up an
+emptied bucket. Use fc->lock to protect against parallel assignments to
+the current bucket.
+
+This leaves just the counter to be a possible scalability issue. The
+fc->num_waiting counter has a similar issue, so both should be addressed at
+the same time.
+
+Reported-by: Amir Goldstein <amir73il@gmail.com>
+Fixes: 2d82ab251ef0 ("virtiofs: propagate sync() to file server")
+Cc: <stable@vger.kernel.org> # v5.14
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c | 21 +++++++++++++++++++
+ fs/fuse/fuse_i.h | 19 +++++++++++++++++
+ fs/fuse/inode.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 100 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -392,6 +392,7 @@ struct fuse_writepage_args {
+ struct list_head queue_entry;
+ struct fuse_writepage_args *next;
+ struct inode *inode;
++ struct fuse_sync_bucket *bucket;
+ };
+
+ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
+@@ -1611,6 +1612,9 @@ static void fuse_writepage_free(struct f
+ struct fuse_args_pages *ap = &wpa->ia.ap;
+ int i;
+
++ if (wpa->bucket)
++ fuse_sync_bucket_dec(wpa->bucket);
++
+ for (i = 0; i < ap->num_pages; i++)
+ __free_page(ap->pages[i]);
+
+@@ -1874,6 +1878,20 @@ static struct fuse_writepage_args *fuse_
+
+ }
+
++static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
++ struct fuse_writepage_args *wpa)
++{
++ if (!fc->sync_fs)
++ return;
++
++ rcu_read_lock();
++ /* Prevent resurrection of dead bucket in unlikely race with syncfs */
++ do {
++ wpa->bucket = rcu_dereference(fc->curr_bucket);
++ } while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
++ rcu_read_unlock();
++}
++
+ static int fuse_writepage_locked(struct page *page)
+ {
+ struct address_space *mapping = page->mapping;
+@@ -1901,6 +1919,7 @@ static int fuse_writepage_locked(struct
+ if (!wpa->ia.ff)
+ goto err_nofile;
+
++ fuse_writepage_add_to_bucket(fc, wpa);
+ fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+
+ copy_highpage(tmp_page, page);
+@@ -2151,6 +2170,8 @@ static int fuse_writepages_fill(struct p
+ __free_page(tmp_page);
+ goto out_unlock;
+ }
++ fuse_writepage_add_to_bucket(fc, wpa);
++
+ data->max_pages = 1;
+
+ ap = &wpa->ia.ap;
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -515,6 +515,13 @@ struct fuse_fs_context {
+ void **fudptr;
+ };
+
++struct fuse_sync_bucket {
++ /* count is a possible scalability bottleneck */
++ atomic_t count;
++ wait_queue_head_t waitq;
++ struct rcu_head rcu;
++};
++
+ /**
+ * A Fuse connection.
+ *
+@@ -807,6 +814,9 @@ struct fuse_conn {
+
+ /** List of filesystems using this connection */
+ struct list_head mounts;
++
++ /* New writepages go into this bucket */
++ struct fuse_sync_bucket __rcu *curr_bucket;
+ };
+
+ /*
+@@ -910,6 +920,15 @@ static inline void fuse_page_descs_lengt
+ descs[i].length = PAGE_SIZE - descs[i].offset;
+ }
+
++static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
++{
++ /* Need RCU protection to prevent use after free after the decrement */
++ rcu_read_lock();
++ if (atomic_dec_and_test(&bucket->count))
++ wake_up(&bucket->waitq);
++ rcu_read_unlock();
++}
++
+ /** Device operations */
+ extern const struct file_operations fuse_dev_operations;
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -506,6 +506,57 @@ static int fuse_statfs(struct dentry *de
+ return err;
+ }
+
++static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
++{
++ struct fuse_sync_bucket *bucket;
++
++ bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
++ if (bucket) {
++ init_waitqueue_head(&bucket->waitq);
++ /* Initial active count */
++ atomic_set(&bucket->count, 1);
++ }
++ return bucket;
++}
++
++static void fuse_sync_fs_writes(struct fuse_conn *fc)
++{
++ struct fuse_sync_bucket *bucket, *new_bucket;
++ int count;
++
++ new_bucket = fuse_sync_bucket_alloc();
++ spin_lock(&fc->lock);
++ bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++ count = atomic_read(&bucket->count);
++ WARN_ON(count < 1);
++ /* No outstanding writes? */
++ if (count == 1) {
++ spin_unlock(&fc->lock);
++ kfree(new_bucket);
++ return;
++ }
++
++ /*
++ * Completion of new bucket depends on completion of this bucket, so add
++ * one more count.
++ */
++ atomic_inc(&new_bucket->count);
++ rcu_assign_pointer(fc->curr_bucket, new_bucket);
++ spin_unlock(&fc->lock);
++ /*
++ * Drop initial active count. At this point if all writes in this and
++ * ancestor buckets complete, the count will go to zero and this task
++ * will be woken up.
++ */
++ atomic_dec(&bucket->count);
++
++ wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
++
++ /* Drop temp count on descendant bucket */
++ fuse_sync_bucket_dec(new_bucket);
++ kfree_rcu(bucket, rcu);
++}
++
+ static int fuse_sync_fs(struct super_block *sb, int wait)
+ {
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+@@ -528,6 +579,8 @@ static int fuse_sync_fs(struct super_blo
+ if (!fc->sync_fs)
+ return 0;
+
++ fuse_sync_fs_writes(fc);
++
+ memset(&inarg, 0, sizeof(inarg));
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+@@ -763,6 +816,7 @@ void fuse_conn_put(struct fuse_conn *fc)
+ {
+ if (refcount_dec_and_test(&fc->count)) {
+ struct fuse_iqueue *fiq = &fc->iq;
++ struct fuse_sync_bucket *bucket;
+
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_conn_free(fc);
+@@ -770,6 +824,11 @@ void fuse_conn_put(struct fuse_conn *fc)
+ fiq->ops->release(fiq);
+ put_pid_ns(fc->pid_ns);
+ put_user_ns(fc->user_ns);
++ bucket = rcu_dereference_protected(fc->curr_bucket, 1);
++ if (bucket) {
++ WARN_ON(atomic_read(&bucket->count) != 1);
++ kfree(bucket);
++ }
+ fc->release(fc);
+ }
+ }
+@@ -1418,6 +1477,7 @@ int fuse_fill_super_common(struct super_
+ if (sb->s_flags & SB_MANDLOCK)
+ goto err;
+
++ rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
+ fuse_sb_defaults(sb);
+
+ if (ctx->is_bdev) {
--- /dev/null
+From 8510505d55e194d3f6c9644c9f9d12c4f6b0395a Mon Sep 17 00:00:00 2001
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+Date: Mon, 16 Aug 2021 08:10:59 +0000
+Subject: IMA: remove the dependency on CRYPTO_MD5
+
+From: THOBY Simon <Simon.THOBY@viveris.fr>
+
+commit 8510505d55e194d3f6c9644c9f9d12c4f6b0395a upstream.
+
+MD5 is a weak digest algorithm that shouldn't be used for cryptographic
+operation. It hinders the efficiency of a patch set that aims to limit
+the digests allowed for the extended file attribute namely security.ima.
+MD5 is no longer a requirement for IMA, nor should it be used there.
+
+The sole place where we still use the MD5 algorithm inside IMA is setting
+the ima_hash algorithm to MD5, if the user supplies 'ima_hash=md5'
+parameter on the command line. With commit ab60368ab6a4 ("ima: Fallback
+to the builtin hash algorithm"), setting "ima_hash=md5" fails gracefully
+when CRYPTO_MD5 is not set:
+ ima: Can not allocate md5 (reason: -2)
+ ima: Allocating md5 failed, going to use default hash algorithm sha256
+
+Remove the CRYPTO_MD5 dependency for IMA.
+
+Signed-off-by: THOBY Simon <Simon.THOBY@viveris.fr>
+Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
+[zohar@linux.ibm.com: include commit number in patch description for
+stable.]
+Cc: stable@vger.kernel.org # 4.17
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/Kconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/security/integrity/ima/Kconfig
++++ b/security/integrity/ima/Kconfig
+@@ -6,7 +6,6 @@ config IMA
+ select SECURITYFS
+ select CRYPTO
+ select CRYPTO_HMAC
+- select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_HASH_INFO
+ select TCG_TPM if HAS_IOMEM && !UML
--- /dev/null
+From a32ad90426a9c8eb3915eed26e08ce133bd9e0da Mon Sep 17 00:00:00 2001
+From: Austin Kim <austin.kim@lge.com>
+Date: Tue, 29 Jun 2021 14:50:50 +0100
+Subject: IMA: remove -Wmissing-prototypes warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Austin Kim <austin.kim@lge.com>
+
+commit a32ad90426a9c8eb3915eed26e08ce133bd9e0da upstream.
+
+With W=1 build, the compiler throws warning message as below:
+
+ security/integrity/ima/ima_mok.c:24:12: warning:
+ no previous prototype for ‘ima_mok_init’ [-Wmissing-prototypes]
+ __init int ima_mok_init(void)
+
+Silence the warning by adding static keyword to ima_mok_init().
+
+Signed-off-by: Austin Kim <austin.kim@lge.com>
+Fixes: 41c89b64d718 ("IMA: create machine owner and blacklist keyrings")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/integrity/ima/ima_mok.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/integrity/ima/ima_mok.c
++++ b/security/integrity/ima/ima_mok.c
+@@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring;
+ /*
+ * Allocate the IMA blacklist keyring
+ */
+-__init int ima_mok_init(void)
++static __init int ima_mok_init(void)
+ {
+ struct key_restriction *restriction;
+
--- /dev/null
+From ecc53c48c13d995e6fe5559e30ffee48d92784fd Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 29 Aug 2021 16:13:03 -0600
+Subject: io-wq: check max_worker limits if a worker transitions bound state
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit ecc53c48c13d995e6fe5559e30ffee48d92784fd upstream.
+
+For the two places where new workers are created, we diligently check if
+we are allowed to create a new worker. If we're currently at the limit
+of how many workers of a given type we can have, then we don't create
+any new ones.
+
+If you have a mixed workload with various types of bound and unbounded
+work, then it can happen that a worker finishes one type of work and
+is then transitioned to the other type. For this case, we don't check
+if we are actually allowed to do so. This can cause io-wq to temporarily
+exceed the allowed number of workers for a given type.
+
+When retrieving work, check that the types match. If they don't, check
+if we are allowed to transition to the other type. If not, then don't
+handle the new work.
+
+Cc: stable@vger.kernel.org
+Reported-by: Johannes Lundberg <johalun0@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io-wq.c | 33 ++++++++++++++++++++++++++++++---
+ 1 file changed, 30 insertions(+), 3 deletions(-)
+
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -423,7 +423,28 @@ static void io_wait_on_hash(struct io_wq
+ spin_unlock(&wq->hash->wait.lock);
+ }
+
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
++/*
++ * We can always run the work if the worker is currently the same type as
++ * the work (eg both are bound, or both are unbound). If they are not the
++ * same, only allow it if incrementing the worker count would be allowed.
++ */
++static bool io_worker_can_run_work(struct io_worker *worker,
++ struct io_wq_work *work)
++{
++ struct io_wqe_acct *acct;
++
++ if (!(worker->flags & IO_WORKER_F_BOUND) !=
++ !(work->flags & IO_WQ_WORK_UNBOUND))
++ return true;
++
++ /* not the same type, check if we'd go over the limit */
++ acct = io_work_get_acct(worker->wqe, work);
++ return acct->nr_workers < acct->max_workers;
++}
++
++static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++ struct io_worker *worker,
++ bool *stalled)
+ __must_hold(wqe->lock)
+ {
+ struct io_wq_work_node *node, *prev;
+@@ -435,6 +456,9 @@ static struct io_wq_work *io_get_next_wo
+
+ work = container_of(node, struct io_wq_work, list);
+
++ if (!io_worker_can_run_work(worker, work))
++ break;
++
+ /* not hashed, can run anytime */
+ if (!io_wq_is_hashed(work)) {
+ wq_list_del(&wqe->work_list, node, prev);
+@@ -461,6 +485,7 @@ static struct io_wq_work *io_get_next_wo
+ raw_spin_unlock(&wqe->lock);
+ io_wait_on_hash(wqe, stall_hash);
+ raw_spin_lock(&wqe->lock);
++ *stalled = true;
+ }
+
+ return NULL;
+@@ -500,6 +525,7 @@ static void io_worker_handle_work(struct
+
+ do {
+ struct io_wq_work *work;
++ bool stalled;
+ get_next:
+ /*
+ * If we got some work, mark us as busy. If we didn't, but
+@@ -508,10 +534,11 @@ get_next:
+ * can't make progress, any work completion or insertion will
+ * clear the stalled flag.
+ */
+- work = io_get_next_work(wqe);
++ stalled = false;
++ work = io_get_next_work(wqe, worker, &stalled);
+ if (work)
+ __io_worker_busy(wqe, worker, work);
+- else if (!wq_list_empty(&wqe->work_list))
++ else if (stalled)
+ wqe->flags |= IO_WQE_FLAG_STALLED;
+
+ raw_spin_unlock_irq(&wqe->lock);
--- /dev/null
+From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Mon, 2 Aug 2021 13:38:30 +0100
+Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 47e6223c841e029bfc23c3ce594dac5525cebaf8 upstream.
+
+Booting a KVM host in protected mode with kmemleak quickly results
+in a pretty bad crash, as kmemleak doesn't know that the HYP sections
+have been taken away. This is specially true for the BSS section,
+which is part of the kernel BSS section and registered at boot time
+by kmemleak itself.
+
+Unregister the HYP part of the BSS before making that section
+HYP-private. The rest of the HYP-specific data is obtained via
+the page allocator or lives in other sections, none of which is
+subjected to kmemleak.
+
+Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org # 5.13
+Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/arm.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -15,6 +15,7 @@
+ #include <linux/fs.h>
+ #include <linux/mman.h>
+ #include <linux/sched.h>
++#include <linux/kmemleak.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
+@@ -1986,6 +1987,12 @@ static int finalize_hyp_mode(void)
+ if (ret)
+ return ret;
+
++ /*
++ * Exclude HYP BSS from kmemleak so that it doesn't get peeked
++ * at, which would end badly once the section is inaccessible.
++ * None of other sections should ever be introspected.
++ */
++ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ ret = pkvm_mark_hyp_section(__hyp_bss);
+ if (ret)
+ return ret;
--- /dev/null
+From 3134cc8beb69d0db9de651081707c4651c011621 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 19 Aug 2021 19:03:05 +0100
+Subject: KVM: arm64: vgic: Resample HW pending state on deactivation
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 3134cc8beb69d0db9de651081707c4651c011621 upstream.
+
+When a mapped level interrupt (a timer, for example) is deactivated
+by the guest, the corresponding host interrupt is equally deactivated.
+However, the fate of the pending state still needs to be dealt
+with in SW.
+
+This is specially true when the interrupt was in the active+pending
+state in the virtual distributor at the point where the guest
+was entered. On exit, the pending state is potentially stale
+(the guest may have put the interrupt in a non-pending state).
+
+If we don't do anything, the interrupt will be spuriously injected
+in the guest. Although this shouldn't have any ill effect (spurious
+interrupts are always possible), we can improve the emulation by
+detecting the deactivation-while-pending case and resample the
+interrupt.
+
+While we're at it, move the logic into a common helper that can
+be shared between the two GIC implementations.
+
+Fixes: e40cc57bac79 ("KVM: arm/arm64: vgic: Support level-triggered mapped interrupts")
+Reported-by: Raghavendra Rao Ananta <rananta@google.com>
+Tested-by: Raghavendra Rao Ananta <rananta@google.com>
+Reviewed-by: Oliver Upton <oupton@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210819180305.1670525-1-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-v2.c | 36 +++++-------------------------------
+ arch/arm64/kvm/vgic/vgic-v3.c | 36 +++++-------------------------------
+ arch/arm64/kvm/vgic/vgic.c | 38 ++++++++++++++++++++++++++++++++++++++
+ arch/arm64/kvm/vgic/vgic.h | 2 ++
+ 4 files changed, 50 insertions(+), 62 deletions(-)
+
+--- a/arch/arm64/kvm/vgic/vgic-v2.c
++++ b/arch/arm64/kvm/vgic/vgic-v2.c
+@@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+ u32 val = cpuif->vgic_lr[lr];
+ u32 cpuid, intid = val & GICH_LR_VIRTUALID;
+ struct vgic_irq *irq;
++ bool deactivated;
+
+ /* Extract the source vCPU id from the LR */
+ cpuid = val & GICH_LR_PHYSID_CPUID;
+@@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+
+ raw_spin_lock(&irq->irq_lock);
+
+- /* Always preserve the active bit */
++ /* Always preserve the active bit, note deactivation */
++ deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
+ irq->active = !!(val & GICH_LR_ACTIVE_BIT);
+
+ if (irq->active && vgic_irq_is_sgi(intid))
+@@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vc
+ if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
+ irq->pending_latch = false;
+
+- /*
+- * Level-triggered mapped IRQs are special because we only
+- * observe rising edges as input to the VGIC.
+- *
+- * If the guest never acked the interrupt we have to sample
+- * the physical line and set the line level, because the
+- * device state could have changed or we simply need to
+- * process the still pending interrupt later.
+- *
+- * If this causes us to lower the level, we have to also clear
+- * the physical active state, since we will otherwise never be
+- * told when the interrupt becomes asserted again.
+- *
+- * Another case is when the interrupt requires a helping hand
+- * on deactivation (no HW deactivation, for example).
+- */
+- if (vgic_irq_is_mapped_level(irq)) {
+- bool resample = false;
+-
+- if (val & GICH_LR_PENDING_BIT) {
+- irq->line_level = vgic_get_phys_line_level(irq);
+- resample = !irq->line_level;
+- } else if (vgic_irq_needs_resampling(irq) &&
+- !(irq->active || irq->pending_latch)) {
+- resample = true;
+- }
+-
+- if (resample)
+- vgic_irq_set_phys_active(irq, false);
+- }
++ /* Handle resampling for mapped interrupts if required */
++ vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
+
+ raw_spin_unlock(&irq->irq_lock);
+ vgic_put_irq(vcpu->kvm, irq);
+--- a/arch/arm64/kvm/vgic/vgic-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-v3.c
+@@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+ u32 intid, cpuid;
+ struct vgic_irq *irq;
+ bool is_v2_sgi = false;
++ bool deactivated;
+
+ cpuid = val & GICH_LR_PHYSID_CPUID;
+ cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+@@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+
+ raw_spin_lock(&irq->irq_lock);
+
+- /* Always preserve the active bit */
++ /* Always preserve the active bit, note deactivation */
++ deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
+ irq->active = !!(val & ICH_LR_ACTIVE_BIT);
+
+ if (irq->active && is_v2_sgi)
+@@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vc
+ if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
+ irq->pending_latch = false;
+
+- /*
+- * Level-triggered mapped IRQs are special because we only
+- * observe rising edges as input to the VGIC.
+- *
+- * If the guest never acked the interrupt we have to sample
+- * the physical line and set the line level, because the
+- * device state could have changed or we simply need to
+- * process the still pending interrupt later.
+- *
+- * If this causes us to lower the level, we have to also clear
+- * the physical active state, since we will otherwise never be
+- * told when the interrupt becomes asserted again.
+- *
+- * Another case is when the interrupt requires a helping hand
+- * on deactivation (no HW deactivation, for example).
+- */
+- if (vgic_irq_is_mapped_level(irq)) {
+- bool resample = false;
+-
+- if (val & ICH_LR_PENDING_BIT) {
+- irq->line_level = vgic_get_phys_line_level(irq);
+- resample = !irq->line_level;
+- } else if (vgic_irq_needs_resampling(irq) &&
+- !(irq->active || irq->pending_latch)) {
+- resample = true;
+- }
+-
+- if (resample)
+- vgic_irq_set_phys_active(irq, false);
+- }
++ /* Handle resampling for mapped interrupts if required */
++ vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
+
+ raw_spin_unlock(&irq->irq_lock);
+ vgic_put_irq(vcpu->kvm, irq);
+--- a/arch/arm64/kvm/vgic/vgic.c
++++ b/arch/arm64/kvm/vgic/vgic.c
+@@ -1022,3 +1022,41 @@ bool kvm_vgic_map_is_active(struct kvm_v
+
+ return map_is_active;
+ }
++
++/*
++ * Level-triggered mapped IRQs are special because we only observe rising
++ * edges as input to the VGIC.
++ *
++ * If the guest never acked the interrupt we have to sample the physical
++ * line and set the line level, because the device state could have changed
++ * or we simply need to process the still pending interrupt later.
++ *
++ * We could also have entered the guest with the interrupt active+pending.
++ * On the next exit, we need to re-evaluate the pending state, as it could
++ * otherwise result in a spurious interrupt by injecting a now potentially
++ * stale pending state.
++ *
++ * If this causes us to lower the level, we have to also clear the physical
++ * active state, since we will otherwise never be told when the interrupt
++ * becomes asserted again.
++ *
++ * Another case is when the interrupt requires a helping hand on
++ * deactivation (no HW deactivation, for example).
++ */
++void vgic_irq_handle_resampling(struct vgic_irq *irq,
++ bool lr_deactivated, bool lr_pending)
++{
++ if (vgic_irq_is_mapped_level(irq)) {
++ bool resample = false;
++
++ if (unlikely(vgic_irq_needs_resampling(irq))) {
++ resample = !(irq->active || irq->pending_latch);
++ } else if (lr_pending || (lr_deactivated && irq->line_level)) {
++ irq->line_level = vgic_get_phys_line_level(irq);
++ resample = !irq->line_level;
++ }
++
++ if (resample)
++ vgic_irq_set_phys_active(irq, false);
++ }
++}
+--- a/arch/arm64/kvm/vgic/vgic.h
++++ b/arch/arm64/kvm/vgic/vgic.h
+@@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgi
+ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags);
+ void vgic_kick_vcpus(struct kvm *kvm);
++void vgic_irq_handle_resampling(struct vgic_irq *irq,
++ bool lr_deactivated, bool lr_pending);
+
+ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+ phys_addr_t addr, phys_addr_t alignment);
--- /dev/null
+From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 10 Aug 2021 07:45:26 -0700
+Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7782bb8d818d8f47c26b22079db10599922787a upstream.
+
+Clear nested.pi_pending on nested VM-Enter even if L2 will run without
+posted interrupts enabled. If nested.pi_pending is left set from a
+previous L2, vmx_complete_nested_posted_interrupt() will pick up the
+stale flag and exit to userspace with an "internal emulation error" due
+the new L2 not having a valid nested.pi_desc.
+
+Arguably, vmx_complete_nested_posted_interrupt() should first check for
+posted interrupts being enabled, but it's also completely reasonable that
+KVM wouldn't screw up a fundamental flag. Not to mention that the mere
+existence of nested.pi_pending is a long-standing bug as KVM shouldn't
+move the posted interrupt out of the IRR until it's actually processed,
+e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit
+with a "pending" posted interrupt. Fixing the mess is a future problem.
+
+Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI
+descriptor as an error, this was a benign bug as the null PI descriptor
+effectively served as a check on PI not being enabled. Even then, the
+new flow did not become problematic until KVM started checking the result
+of kvm_check_nested_events().
+
+Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
+Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable")
+Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails")
+Cc: stable@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210810144526.2662272-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2223,12 +2223,11 @@ static void prepare_vmcs02_early(struct
+ ~PIN_BASED_VMX_PREEMPTION_TIMER);
+
+ /* Posted interrupts setting is only taken from vmcs12. */
+- if (nested_cpu_has_posted_intr(vmcs12)) {
++ vmx->nested.pi_pending = false;
++ if (nested_cpu_has_posted_intr(vmcs12))
+ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
+- vmx->nested.pi_pending = false;
+- } else {
++ else
+ exec_control &= ~PIN_BASED_POSTED_INTR;
+- }
+ pin_controls_set(vmx, exec_control);
+
+ /*
--- /dev/null
+From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001
+From: Halil Pasic <pasic@linux.ibm.com>
+Date: Fri, 27 Aug 2021 14:54:29 +0200
+Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Halil Pasic <pasic@linux.ibm.com>
+
+commit a3e03bc1368c1bc16e19b001fc96dc7430573cc8 upstream.
+
+While in practice vcpu->vcpu_idx == vcpu->vcp_id is often true, it may
+not always be, and we must not rely on this. Reason is that KVM decides
+the vcpu_idx, userspace decides the vcpu_id, thus the two might not
+match.
+
+Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies
+that code like
+for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+ vcpu = kvm_get_vcpu(kvm, vcpu_id);
+ do_stuff(vcpu);
+}
+is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an
+vcpu_id. The trouble is, we do actually use kvm->arch.idle_mask like
+this. To fix this problem we have two options. Either use
+kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id,
+or switch to indexing via vcpu_idx. The latter is preferable for obvious
+reasons.
+
+Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to
+indexing it by vcpu_idx. To keep gisa_int.kicked_mask indexed by the
+same index as idle_mask lets make the same change for it as well.
+
+Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array")
+Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
+Reviewed-by: Christian Bornträger <borntraeger@de.ibm.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 3.15+
+Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kvm_host.h | 1 +
+ arch/s390/kvm/interrupt.c | 12 ++++++------
+ arch/s390/kvm/kvm-s390.c | 2 +-
+ arch/s390/kvm/kvm-s390.h | 2 +-
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -957,6 +957,7 @@ struct kvm_arch{
+ atomic64_t cmma_dirty_pages;
+ /* subset of available cpu features enabled by user space */
+ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
++ /* indexed by vcpu_idx */
+ DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+ struct kvm_s390_gisa_interrupt gisa_int;
+ struct kvm_s390_pv pv;
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(st
+ static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+- set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++ set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+
+ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+ {
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+- clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++ clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+
+ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vc
+
+ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
+ {
+- int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
++ int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
+ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ struct kvm_vcpu *vcpu;
+
+- for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+- vcpu = kvm_get_vcpu(kvm, vcpu_id);
++ for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
++ vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ if (psw_ioint_disabled(vcpu))
+ continue;
+ deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+ if (deliverable_mask) {
+ /* lately kicked but not yet running */
+- if (test_and_set_bit(vcpu_id, gi->kicked_mask))
++ if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
+ return;
+ kvm_s390_vcpu_wakeup(vcpu);
+ return;
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -4044,7 +4044,7 @@ static int vcpu_pre_run(struct kvm_vcpu
+ kvm_s390_patch_guest_per_regs(vcpu);
+ }
+
+- clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
++ clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+
+ vcpu->arch.sie_block->icptcode = 0;
+ cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct
+
+ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+ {
+- return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
++ return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ }
+
+ static inline int kvm_is_ucontrol(struct kvm *kvm)
--- /dev/null
+From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 26 Aug 2021 12:57:49 +0300
+Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 upstream.
+
+If we are emulating an invalid guest state, we don't have a correct
+exit reason, and thus we shouldn't do anything in this function.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6368,6 +6368,9 @@ static void vmx_handle_exit_irqoff(struc
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
++ if (vmx->emulation_required)
++ return;
++
+ if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+ handle_external_interrupt_irqoff(vcpu);
+ else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
--- /dev/null
+From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 6 Aug 2021 07:05:58 -0400
+Subject: KVM: x86: clamp host mapping level to max_level in kvm_mmu_max_mapping_level
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit ec607a564f70519b340f7eb4cfc0f4a6b55285ac upstream.
+
+This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler,
+but it does fix two bugs as well.
+
+One bug is in zapping collapsible PTEs. If a large page size is
+disallowed but not all of them, kvm_mmu_max_mapping_level will return the
+host mapping level and the small PTEs will be zapped up to that level.
+However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and
+preserve the 2MB ones. This can happen for example when NX huge pages
+are in use.
+
+The second would happen when userspace backs guest memory
+with a 1gb hugepage but only assign a subset of the page to
+the guest. 1gb pages would be disallowed by the memslot, but
+not 2mb. kvm_mmu_max_mapping_level() would fall through to the
+host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole
+thing into the guest.
+
+Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2846,6 +2846,7 @@ int kvm_mmu_max_mapping_level(struct kvm
+ kvm_pfn_t pfn, int max_level)
+ {
+ struct kvm_lpage_info *linfo;
++ int host_level;
+
+ max_level = min(max_level, max_huge_page_level);
+ for ( ; max_level > PG_LEVEL_4K; max_level--) {
+@@ -2857,7 +2858,8 @@ int kvm_mmu_max_mapping_level(struct kvm
+ if (max_level == PG_LEVEL_4K)
+ return PG_LEVEL_4K;
+
+- return host_pfn_mapping_level(kvm, gfn, pfn, slot);
++ host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
++ return min(host_level, max_level);
+ }
+
+ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+@@ -2881,17 +2883,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_v
+ if (!slot)
+ return PG_LEVEL_4K;
+
+- level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+- if (level == PG_LEVEL_4K)
+- return level;
+-
+- *req_level = level = min(level, max_level);
+-
+ /*
+ * Enforce the iTLB multihit workaround after capturing the requested
+ * level, which will be used to do precise, accurate accounting.
+ */
+- if (huge_page_disallowed)
++ *req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
++ if (level == PG_LEVEL_4K || huge_page_disallowed)
+ return PG_LEVEL_4K;
+
+ /*
--- /dev/null
+From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 2 Aug 2021 21:46:06 -0700
+Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage stats
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 088acd23526647844aec1c39db4ad02552c86c7b upstream.
+
+Factor in whether or not the old/new SPTEs are shadow-present when
+adjusting the large page stats in the TDP MMU. A modified MMIO SPTE can
+toggle the page size bit, as bit 7 is used to store the MMIO generation,
+i.e. is_large_pte() can get a false positive when called on a MMIO SPTE.
+Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic
+value.
+
+Opportunistically move the logic below the check to verify at least one
+of the old/new SPTEs is shadow present.
+
+Use is/was_leaf even though is/was_present would suffice. The code
+generation is roughly equivalent since all flags need to be computed
+prior to the code in question, and using the *_leaf flags will minimize
+the diff in a future enhancement to account all pages, i.e. will change
+the check to "is_leaf != was_leaf".
+
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+
+Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20210803044607.599629-3-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -412,6 +412,7 @@ static void __handle_changed_spte(struct
+ bool was_leaf = was_present && is_last_spte(old_spte, level);
+ bool is_leaf = is_present && is_last_spte(new_spte, level);
+ bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
++ bool was_large, is_large;
+
+ WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ WARN_ON(level < PG_LEVEL_4K);
+@@ -445,13 +446,6 @@ static void __handle_changed_spte(struct
+
+ trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+
+- if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+- if (is_large_pte(old_spte))
+- atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+- else
+- atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+- }
+-
+ /*
+ * The only times a SPTE should be changed from a non-present to
+ * non-present state is when an MMIO entry is installed/modified/
+@@ -477,6 +471,18 @@ static void __handle_changed_spte(struct
+ return;
+ }
+
++ /*
++ * Update large page stats if a large page is being zapped, created, or
++ * is replacing an existing shadow page.
++ */
++ was_large = was_leaf && is_large_pte(old_spte);
++ is_large = is_leaf && is_large_pte(new_spte);
++ if (was_large != is_large) {
++ if (was_large)
++ atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages);
++ else
++ atomic64_add(1, (atomic64_t *)&kvm->stat.lpages);
++ }
+
+ if (was_leaf && is_dirty_spte(old_spte) &&
+ (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
--- /dev/null
+From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+Date: Wed, 28 Apr 2021 10:22:01 +0800
+Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted
+
+From: Zelin Deng <zelin.deng@linux.alibaba.com>
+
+commit d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 upstream.
+
+When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature
+especially there's a big tsc warp (like a new vCPU is hot-added into VM
+which has been up for a long time), tsc_offset is added by a large value
+then go back to guest. This causes system time jump as tsc_timestamp is
+not adjusted in the meantime and pvclock monotonic character.
+To fix this, just notify kvm to update vCPU's guest time before back to
+guest.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3316,6 +3316,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
+ if (!msr_info->host_initiated) {
+ s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+ adjust_tsc_offset_guest(vcpu, adj);
++ /* Before back to guest, tsc_timestamp must be adjusted
++ * as well, otherwise guest's percpu pvclock time could jump.
++ */
++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ }
+ vcpu->arch.ia32_tsc_adjust_msr = data;
+ }
--- /dev/null
+From 46d4703b1db4c86ab5acb2331b10df999f005e8e Mon Sep 17 00:00:00 2001
+From: Xiao Ni <xni@redhat.com>
+Date: Wed, 18 Aug 2021 13:57:48 +0800
+Subject: md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
+
+From: Xiao Ni <xni@redhat.com>
+
+commit 46d4703b1db4c86ab5acb2331b10df999f005e8e upstream.
+
+We are seeing the following warning in raid10_handle_discard.
+[ 695.110751] =============================
+[ 695.131439] WARNING: suspicious RCU usage
+[ 695.151389] 4.18.0-319.el8.x86_64+debug #1 Not tainted
+[ 695.174413] -----------------------------
+[ 695.192603] drivers/md/raid10.c:1776 suspicious
+rcu_dereference_check() usage!
+[ 695.225107] other info that might help us debug this:
+[ 695.260940] rcu_scheduler_active = 2, debug_locks = 1
+[ 695.290157] no locks held by mkfs.xfs/10186.
+
+In the first loop of function raid10_handle_discard. It already
+determines which disk need to handle discard request and add the
+rdev reference count rdev->nr_pending. So the conf->mirrors will
+not change until all bios come back from underlayer disks. It
+doesn't need to use rcu_dereference to get rdev.
+
+Cc: stable@vger.kernel.org
+Fixes: d30588b2731f ('md/raid10: improve raid10 discard request')
+Signed-off-by: Xiao Ni <xni@redhat.com>
+Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid10.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1712,6 +1712,11 @@ retry_discard:
+ } else
+ r10_bio->master_bio = (struct bio *)first_r10bio;
+
++ /*
++ * first select target devices under rcu_lock and
++ * inc refcount on their rdev. Record them by setting
++ * bios[x] to bio
++ */
+ rcu_read_lock();
+ for (disk = 0; disk < geo->raid_disks; disk++) {
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+@@ -1743,9 +1748,6 @@ retry_discard:
+ for (disk = 0; disk < geo->raid_disks; disk++) {
+ sector_t dev_start, dev_end;
+ struct bio *mbio, *rbio = NULL;
+- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+- struct md_rdev *rrdev = rcu_dereference(
+- conf->mirrors[disk].replacement);
+
+ /*
+ * Now start to calculate the start and end address for each disk.
+@@ -1775,9 +1777,12 @@ retry_discard:
+
+ /*
+ * It only handles discard bio which size is >= stripe size, so
+- * dev_end > dev_start all the time
++ * dev_end > dev_start all the time.
++ * It doesn't need to use rcu lock to get rdev here. We already
++ * add rdev->nr_pending in the first loop.
+ */
+ if (r10_bio->devs[disk].bio) {
++ struct md_rdev *rdev = conf->mirrors[disk].rdev;
+ mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio->bi_end_io = raid10_end_discard_request;
+ mbio->bi_private = r10_bio;
+@@ -1790,6 +1795,7 @@ retry_discard:
+ bio_endio(mbio);
+ }
+ if (r10_bio->devs[disk].repl_bio) {
++ struct md_rdev *rrdev = conf->mirrors[disk].replacement;
+ rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ rbio->bi_end_io = raid10_end_discard_request;
+ rbio->bi_private = r10_bio;
--- /dev/null
+From 3f2cbe3810a60111a33f5f6267bd5a237b826fc9 Mon Sep 17 00:00:00 2001
+From: Alexander Antonov <alexander.antonov@linux.intel.com>
+Date: Tue, 6 Jul 2021 12:07:23 +0300
+Subject: perf/x86/intel/uncore: Fix IIO cleanup mapping procedure for SNR/ICX
+
+From: Alexander Antonov <alexander.antonov@linux.intel.com>
+
+commit 3f2cbe3810a60111a33f5f6267bd5a237b826fc9 upstream.
+
+skx_iio_cleanup_mapping() is re-used for snr and icx, but in those
+cases it fails to use the appropriate XXX_iio_mapping_group and as
+such fails to free previously allocated resources, leading to memory
+leaks.
+
+Fixes: 10337e95e04c ("perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on ICX")
+Signed-off-by: Alexander Antonov <alexander.antonov@linux.intel.com>
+[peterz: Changelog]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210706090723.41850-1-alexander.antonov@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/uncore_snbep.c | 40 ++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/events/intel/uncore_snbep.c
++++ b/arch/x86/events/intel/uncore_snbep.c
+@@ -3838,26 +3838,32 @@ clear_attr_update:
+ return ret;
+ }
+
+-static int skx_iio_set_mapping(struct intel_uncore_type *type)
+-{
+- return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+-}
+-
+-static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
++static void
++pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+ {
+- struct attribute **attr = skx_iio_mapping_group.attrs;
++ struct attribute **attr = ag->attrs;
+
+ if (!attr)
+ return;
+
+ for (; *attr; attr++)
+ kfree((*attr)->name);
+- kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
+- kfree(skx_iio_mapping_group.attrs);
+- skx_iio_mapping_group.attrs = NULL;
++ kfree(attr_to_ext_attr(*ag->attrs));
++ kfree(ag->attrs);
++ ag->attrs = NULL;
+ kfree(type->topology);
+ }
+
++static int skx_iio_set_mapping(struct intel_uncore_type *type)
++{
++ return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
++}
++
++static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++ pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group);
++}
++
+ static struct intel_uncore_type skx_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+@@ -4501,6 +4507,11 @@ static int snr_iio_set_mapping(struct in
+ return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+ }
+
++static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++ pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
++}
++
+ static struct intel_uncore_type snr_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+@@ -4517,7 +4528,7 @@ static struct intel_uncore_type snr_unco
+ .attr_update = snr_iio_attr_update,
+ .get_topology = snr_iio_get_topology,
+ .set_mapping = snr_iio_set_mapping,
+- .cleanup_mapping = skx_iio_cleanup_mapping,
++ .cleanup_mapping = snr_iio_cleanup_mapping,
+ };
+
+ static struct intel_uncore_type snr_uncore_irp = {
+@@ -5092,6 +5103,11 @@ static int icx_iio_set_mapping(struct in
+ return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+ }
+
++static void icx_iio_cleanup_mapping(struct intel_uncore_type *type)
++{
++ pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group);
++}
++
+ static struct intel_uncore_type icx_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+@@ -5109,7 +5125,7 @@ static struct intel_uncore_type icx_unco
+ .attr_update = icx_iio_attr_update,
+ .get_topology = icx_iio_get_topology,
+ .set_mapping = icx_iio_set_mapping,
+- .cleanup_mapping = skx_iio_cleanup_mapping,
++ .cleanup_mapping = icx_iio_cleanup_mapping,
+ };
+
+ static struct intel_uncore_type icx_uncore_irp = {
--- /dev/null
+From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 31 Aug 2021 09:42:22 -0700
+Subject: Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()"
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e7177339d7b5f9594b316842122b5fda9513d5e2 upstream.
+
+Revert a misguided illegal GPA check when "translating" a non-nested GPA.
+The check is woefully incomplete as it does not fill in @exception as
+expected by all callers, which leads to KVM attempting to inject a bogus
+exception, potentially exposing kernel stack information in the process.
+
+ WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0
+ RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525
+ Call Trace:
+ x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853
+ kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199
+ handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336
+ __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline]
+ vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038
+ vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712
+ vcpu_run arch/x86/kvm/x86.c:9779 [inline]
+ kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010
+ kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652
+
+The bug has escaped notice because practically speaking the GPA check is
+useless. The GPA check in question only comes into play when KVM is
+walking guest page tables (or "translating" CR3), and KVM already handles
+illegal GPA checks by setting reserved bits in rsvd_bits_mask for each
+PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an
+illegal CR3. This particular failure doesn't hit the existing reserved
+bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture
+simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging
+doesn't define any reserved PA bits checks, which KVM emulates by only
+incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32.
+
+Simply remove the bogus check. There is zero meaningful value and no
+architectural justification for supporting guest.MAXPHYADDR < 32, and
+properly filling the exception would introduce non-trivial complexity.
+
+This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013.
+
+Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210831164224.1119728-2-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -323,12 +323,6 @@ static bool check_mmio_spte(struct kvm_v
+ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception)
+ {
+- /* Check if guest physical address doesn't exceed guest maximum */
+- if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
+- exception->error_code |= PFERR_RSVD_MASK;
+- return UNMAPPED_GVA;
+- }
+-
+ return gpa;
+ }
+
cifs-do-not-leak-edeadlk-to-dgetents64-for-status_user_session_deleted.patch
smb3-fix-posix-extensions-mount-option.patch
tty-fix-data-race-between-tiocsti-and-flush_to_ldisc.patch
+perf-x86-intel-uncore-fix-iio-cleanup-mapping-procedure-for-snr-icx.patch
+revert-kvm-x86-mmu-add-guest-physical-address-check-in-translate_gpa.patch
+kvm-s390-index-kvm-arch.idle_mask-by-vcpu_idx.patch
+kvm-x86-update-vcpu-s-hv_clock-before-back-to-guest-when-tsc_offset-is-adjusted.patch
+kvm-x86-clamp-host-mapping-level-to-max_level-in-kvm_mmu_max_mapping_level.patch
+kvm-x86-mmu-avoid-collision-with-present-sptes-in-tdp-mmu-lpage-stats.patch
+kvm-vmx-avoid-running-vmx_handle_exit_irqoff-in-case-of-emulation.patch
+kvm-nvmx-unconditionally-clear-nested.pi_pending-on-nested-vm-enter.patch
+kvm-arm64-unregister-hyp-sections-from-kmemleak-in-protected-mode.patch
+kvm-arm64-vgic-resample-hw-pending-state-on-deactivation.patch
+arm-dts-at91-add-pinctrl-names-0-for-all-gpios.patch
+io-wq-check-max_worker-limits-if-a-worker-transitions-bound-state.patch
+md-raid10-remove-unnecessary-rcu_dereference-in-raid10_handle_discard.patch
+char-tpm-kconfig-remove-bad-i2c-cr50-select.patch
+fuse-truncate-pagecache-on-atomic_o_trunc.patch
+fuse-flush-extending-writes.patch
+fuse-wait-for-writepages-in-syncfs.patch
+ima-remove-wmissing-prototypes-warning.patch
+ima-remove-the-dependency-on-crypto_md5.patch