]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 21 May 2022 14:30:58 +0000 (16:30 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 21 May 2022 14:30:58 +0000 (16:30 +0200)
added patches:
arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch
arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch
crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch
dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch
drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch
fix-double-fget-in-vhost_net_set_backend.patch
kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch
libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch
pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch
perf-fix-sys_perf_event_open-race-against-self.patch
selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch

12 files changed:
queue-5.10/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch [new file with mode: 0644]
queue-5.10/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch [new file with mode: 0644]
queue-5.10/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch [new file with mode: 0644]
queue-5.10/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch [new file with mode: 0644]
queue-5.10/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch [new file with mode: 0644]
queue-5.10/fix-double-fget-in-vhost_net_set_backend.patch [new file with mode: 0644]
queue-5.10/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch [new file with mode: 0644]
queue-5.10/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch [new file with mode: 0644]
queue-5.10/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch [new file with mode: 0644]
queue-5.10/perf-fix-sys_perf_event_open-race-against-self.patch [new file with mode: 0644]
queue-5.10/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch b/queue-5.10/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch
new file mode 100644 (file)
index 0000000..ef94584
--- /dev/null
@@ -0,0 +1,41 @@
+From 1d0cb4c8864addc362bae98e8ffa5500c87e1227 Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Tue, 17 May 2022 10:35:32 +0100
+Subject: arm64: mte: Ensure the cleared tags are visible before setting the PTE
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 1d0cb4c8864addc362bae98e8ffa5500c87e1227 upstream.
+
+As an optimisation, only pages mapped with PROT_MTE in user space have
+the MTE tags zeroed. This is done lazily at the set_pte_at() time via
+mte_sync_tags(). However, this function is missing a barrier and another
+CPU may see the PTE updated before the zeroed tags are visible. Add an
+smp_wmb() barrier if the mapping is Normal Tagged.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE")
+Cc: <stable@vger.kernel.org> # 5.10.x
+Reported-by: Vladimir Murzin <vladimir.murzin@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
+Link: https://lore.kernel.org/r/20220517093532.127095-1-catalin.marinas@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/mte.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/kernel/mte.c
++++ b/arch/arm64/kernel/mte.c
+@@ -45,6 +45,9 @@ void mte_sync_tags(pte_t *ptep, pte_t pt
+               if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+                       mte_sync_page_tags(page, ptep, check_swap);
+       }
++
++      /* ensure the tags are visible before the PTE is set */
++      smp_wmb();
+ }
+ int memcmp_pages(struct page *page1, struct page *page2)
diff --git a/queue-5.10/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch b/queue-5.10/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch
new file mode 100644 (file)
index 0000000..d77e8de
--- /dev/null
@@ -0,0 +1,145 @@
+From 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef Mon Sep 17 00:00:00 2001
+From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
+Date: Fri, 13 May 2022 10:46:54 -0700
+Subject: arm64: paravirt: Use RCU read locks to guard stolen_time
+
+From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
+
+commit 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef upstream.
+
+During hotplug, the stolen time data structure is unmapped and memset.
+There is a possibility of the timer IRQ being triggered before memset
+and stolen time is getting updated as part of this timer IRQ handler. This
+causes the below crash in timer handler -
+
+  [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
+  ...
+  [ 3458.154398][    C5] Call trace:
+  [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
+  [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
+  [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
+  [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
+  [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
+  [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
+  [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
+  [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
+  [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
+  [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
+  [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
+  [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
+  [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
+  [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
+  [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
+  [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
+  [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
+  [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
+  [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
+  [ 3458.251656][    C5]  __vunmap+0x154/0x278
+  [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
+  [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
+  [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
+  [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
+  [ 3458.276638][    C5]  kthread+0x17c/0x1e0
+  [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
+
+As a fix, introduce rcu lock to update stolen time structure.
+
+Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
+Cc: stable@vger.kernel.org
+Suggested-by: Will Deacon <will@kernel.org>
+Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
+Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
+Reviewed-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+Link: https://lore.kernel.org/r/20220513174654.362169-1-quic_eberman@quicinc.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/paravirt.c |   29 +++++++++++++++++++++--------
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+--- a/arch/arm64/kernel/paravirt.c
++++ b/arch/arm64/kernel/paravirt.c
+@@ -30,7 +30,7 @@ struct paravirt_patch_template pv_ops;
+ EXPORT_SYMBOL_GPL(pv_ops);
+ struct pv_time_stolen_time_region {
+-      struct pvclock_vcpu_stolen_time *kaddr;
++      struct pvclock_vcpu_stolen_time __rcu *kaddr;
+ };
+ static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+@@ -47,7 +47,9 @@ early_param("no-steal-acc", parse_no_ste
+ /* return stolen time in ns by asking the hypervisor */
+ static u64 pv_steal_clock(int cpu)
+ {
++      struct pvclock_vcpu_stolen_time *kaddr = NULL;
+       struct pv_time_stolen_time_region *reg;
++      u64 ret = 0;
+       reg = per_cpu_ptr(&stolen_time_region, cpu);
+@@ -56,28 +58,37 @@ static u64 pv_steal_clock(int cpu)
+        * online notification callback runs. Until the callback
+        * has run we just return zero.
+        */
+-      if (!reg->kaddr)
++      rcu_read_lock();
++      kaddr = rcu_dereference(reg->kaddr);
++      if (!kaddr) {
++              rcu_read_unlock();
+               return 0;
++      }
+-      return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
++      ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
++      rcu_read_unlock();
++      return ret;
+ }
+ static int stolen_time_cpu_down_prepare(unsigned int cpu)
+ {
++      struct pvclock_vcpu_stolen_time *kaddr = NULL;
+       struct pv_time_stolen_time_region *reg;
+       reg = this_cpu_ptr(&stolen_time_region);
+       if (!reg->kaddr)
+               return 0;
+-      memunmap(reg->kaddr);
+-      memset(reg, 0, sizeof(*reg));
++      kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
++      synchronize_rcu();
++      memunmap(kaddr);
+       return 0;
+ }
+ static int stolen_time_cpu_online(unsigned int cpu)
+ {
++      struct pvclock_vcpu_stolen_time *kaddr = NULL;
+       struct pv_time_stolen_time_region *reg;
+       struct arm_smccc_res res;
+@@ -88,17 +99,19 @@ static int stolen_time_cpu_online(unsign
+       if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+               return -EINVAL;
+-      reg->kaddr = memremap(res.a0,
++      kaddr = memremap(res.a0,
+                             sizeof(struct pvclock_vcpu_stolen_time),
+                             MEMREMAP_WB);
++      rcu_assign_pointer(reg->kaddr, kaddr);
++
+       if (!reg->kaddr) {
+               pr_warn("Failed to map stolen time data structure\n");
+               return -ENOMEM;
+       }
+-      if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+-          le32_to_cpu(reg->kaddr->attributes) != 0) {
++      if (le32_to_cpu(kaddr->revision) != 0 ||
++          le32_to_cpu(kaddr->attributes) != 0) {
+               pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+               return -ENXIO;
+       }
diff --git a/queue-5.10/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch b/queue-5.10/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch
new file mode 100644 (file)
index 0000000..44f9d80
--- /dev/null
@@ -0,0 +1,40 @@
+From 16287397ec5c08aa58db6acf7dbc55470d78087d Mon Sep 17 00:00:00 2001
+From: Ondrej Mosnacek <omosnace@redhat.com>
+Date: Tue, 3 May 2022 13:50:10 +0200
+Subject: crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+commit 16287397ec5c08aa58db6acf7dbc55470d78087d upstream.
+
+The commit referenced in the Fixes tag removed the 'break' from the else
+branch in qcom_rng_read(), causing an infinite loop whenever 'max' is
+not a multiple of WORD_SZ. This can be reproduced e.g. by running:
+
+    kcapi-rng -b 67 >/dev/null
+
+There are many ways to fix this without adding back the 'break', but
+they all seem more awkward than simply adding it back, so do just that.
+
+Tested on a machine with Qualcomm Amberwing processor.
+
+Fixes: a680b1832ced ("crypto: qcom-rng - ensure buffer for generate is completely filled")
+Cc: stable@vger.kernel.org
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+Reviewed-by: Brian Masney <bmasney@redhat.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/qcom-rng.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/crypto/qcom-rng.c
++++ b/drivers/crypto/qcom-rng.c
+@@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng
+               } else {
+                       /* copy only remaining bytes */
+                       memcpy(data, &val, max - currsize);
++                      break;
+               }
+       } while (currsize < max);
diff --git a/queue-5.10/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch b/queue-5.10/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch
new file mode 100644 (file)
index 0000000..133efc9
--- /dev/null
@@ -0,0 +1,54 @@
+From 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= <jerome.pouiller@silabs.com>
+Date: Tue, 17 May 2022 09:27:08 +0200
+Subject: dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jérôme Pouiller <jerome.pouiller@silabs.com>
+
+commit 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 upstream.
+
+The typedefs u32 and u64 are not available in userspace. Thus user get
+an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B:
+
+    $ gcc -Wall   -c -MMD -c -o ioctls_list.o ioctls_list.c
+    In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1,
+                     from /usr/include/linux/ioctl.h:5,
+                     from /usr/include/asm-generic/ioctls.h:5,
+                     from ioctls_list.c:11:
+    ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function)
+      463 |     { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h
+          |                             ^~~~~~~~~~~~~~~~~~
+    ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function)
+      464 |     { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h
+          |                             ^~~~~~~~~~~~~~~~~~
+
+The issue was initially reported here[1].
+
+[1]: https://github.com/jerome-pouiller/ioctl/pull/14
+
+Signed-off-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi")
+CC: stable@vger.kernel.org
+Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/dma-buf.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/uapi/linux/dma-buf.h
++++ b/include/uapi/linux/dma-buf.h
+@@ -44,7 +44,7 @@ struct dma_buf_sync {
+  * between them in actual uapi, they're just different numbers.
+  */
+ #define DMA_BUF_SET_NAME      _IOW(DMA_BUF_BASE, 1, const char *)
+-#define DMA_BUF_SET_NAME_A    _IOW(DMA_BUF_BASE, 1, u32)
+-#define DMA_BUF_SET_NAME_B    _IOW(DMA_BUF_BASE, 1, u64)
++#define DMA_BUF_SET_NAME_A    _IOW(DMA_BUF_BASE, 1, __u32)
++#define DMA_BUF_SET_NAME_B    _IOW(DMA_BUF_BASE, 1, __u64)
+ #endif
diff --git a/queue-5.10/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch b/queue-5.10/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch
new file mode 100644 (file)
index 0000000..d8e1c4c
--- /dev/null
@@ -0,0 +1,32 @@
+From 6e03b13cc7d9427c2c77feed1549191015615202 Mon Sep 17 00:00:00 2001
+From: Hangyu Hua <hbh25y@gmail.com>
+Date: Mon, 16 May 2022 11:20:42 +0800
+Subject: drm/dp/mst: fix a possible memory leak in fetch_monitor_name()
+
+From: Hangyu Hua <hbh25y@gmail.com>
+
+commit 6e03b13cc7d9427c2c77feed1549191015615202 upstream.
+
+drm_dp_mst_get_edid call kmemdup to create mst_edid. So mst_edid need to be
+freed after use.
+
+Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Cc: stable@vger.kernel.org
+Link: https://patchwork.freedesktop.org/patch/msgid/20220516032042.13166-1-hbh25y@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_dp_mst_topology.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -4792,6 +4792,7 @@ static void fetch_monitor_name(struct dr
+       mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port);
+       drm_edid_get_monitor_name(mst_edid, name, namelen);
++      kfree(mst_edid);
+ }
+ /**
diff --git a/queue-5.10/fix-double-fget-in-vhost_net_set_backend.patch b/queue-5.10/fix-double-fget-in-vhost_net_set_backend.patch
new file mode 100644 (file)
index 0000000..e67dbef
--- /dev/null
@@ -0,0 +1,69 @@
+From fb4554c2232e44d595920f4d5c66cf8f7d13f9bc Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 16 May 2022 16:42:13 +0800
+Subject: Fix double fget() in vhost_net_set_backend()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit fb4554c2232e44d595920f4d5c66cf8f7d13f9bc upstream.
+
+Descriptor table is a shared resource; two fget() on the same descriptor
+may return different struct file references.  get_tap_ptr_ring() is
+called after we'd found (and pinned) the socket we'll be using and it
+tries to find the private tun/tap data structures associated with it.
+Redoing the lookup by the same file descriptor we'd used to get the
+socket is racy - we need to same struct file.
+
+Thanks to Jason for spotting a braino in the original variant of patch -
+I'd missed the use of fd == -1 for disabling backend, and in that case
+we can end up with sock == NULL and sock != oldsock.
+
+Cc: stable@kernel.org
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c |   15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -1450,13 +1450,9 @@ err:
+       return ERR_PTR(r);
+ }
+-static struct ptr_ring *get_tap_ptr_ring(int fd)
++static struct ptr_ring *get_tap_ptr_ring(struct file *file)
+ {
+       struct ptr_ring *ring;
+-      struct file *file = fget(fd);
+-
+-      if (!file)
+-              return NULL;
+       ring = tun_get_tx_ring(file);
+       if (!IS_ERR(ring))
+               goto out;
+@@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring
+               goto out;
+       ring = NULL;
+ out:
+-      fput(file);
+       return ring;
+ }
+@@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct
+               r = vhost_net_enable_vq(n, vq);
+               if (r)
+                       goto err_used;
+-              if (index == VHOST_NET_VQ_RX)
+-                      nvq->rx_ring = get_tap_ptr_ring(fd);
++              if (index == VHOST_NET_VQ_RX) {
++                      if (sock)
++                              nvq->rx_ring = get_tap_ptr_ring(sock->file);
++                      else
++                              nvq->rx_ring = NULL;
++              }
+               oldubufs = nvq->ubufs;
+               nvq->ubufs = ubufs;
diff --git a/queue-5.10/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch b/queue-5.10/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch
new file mode 100644 (file)
index 0000000..106ffee
--- /dev/null
@@ -0,0 +1,71 @@
+From b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 11 May 2022 14:51:22 +0000
+Subject: KVM: x86/mmu: Update number of zapped pages even if page list is stable
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 upstream.
+
+When zapping obsolete pages, update the running count of zapped pages
+regardless of whether or not the list has become unstable due to zapping
+a shadow page with its own child shadow pages.  If the VM is backed by
+mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping
+the batch count and thus without yielding.  In the worst case scenario,
+this can cause a soft lokcup.
+
+ watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020]
+   RIP: 0010:workingset_activation+0x19/0x130
+   mark_page_accessed+0x266/0x2e0
+   kvm_set_pfn_accessed+0x31/0x40
+   mmu_spte_clear_track_bits+0x136/0x1c0
+   drop_spte+0x1a/0xc0
+   mmu_page_zap_pte+0xef/0x120
+   __kvm_mmu_prepare_zap_page+0x205/0x5e0
+   kvm_mmu_zap_all_fast+0xd7/0x190
+   kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10
+   kvm_page_track_flush_slot+0x5c/0x80
+   kvm_arch_flush_shadow_memslot+0xe/0x10
+   kvm_set_memslot+0x1a8/0x5d0
+   __kvm_set_memory_region+0x337/0x590
+   kvm_vm_ioctl+0xb08/0x1040
+
+Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""")
+Reported-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220511145122.3133334-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5375,6 +5375,7 @@ static void kvm_zap_obsolete_pages(struc
+ {
+       struct kvm_mmu_page *sp, *node;
+       int nr_zapped, batch = 0;
++      bool unstable;
+ restart:
+       list_for_each_entry_safe_reverse(sp, node,
+@@ -5406,11 +5407,12 @@ restart:
+                       goto restart;
+               }
+-              if (__kvm_mmu_prepare_zap_page(kvm, sp,
+-                              &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+-                      batch += nr_zapped;
++              unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
++                              &kvm->arch.zapped_obsolete_pages, &nr_zapped);
++              batch += nr_zapped;
++
++              if (unstable)
+                       goto restart;
+-              }
+       }
+       /*
diff --git a/queue-5.10/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch b/queue-5.10/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch
new file mode 100644 (file)
index 0000000..576761f
--- /dev/null
@@ -0,0 +1,566 @@
+From 75dbb685f4e8786c33ddef8279bab0eadfb0731f Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Sat, 14 May 2022 12:16:47 +0200
+Subject: libceph: fix potential use-after-free on linger ping and resends
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 75dbb685f4e8786c33ddef8279bab0eadfb0731f upstream.
+
+request_reinit() is not only ugly as the comment rightfully suggests,
+but also unsafe.  Even though it is called with osdc->lock held for
+write in all cases, resetting the OSD request refcount can still race
+with handle_reply() and result in use-after-free.  Taking linger ping
+as an example:
+
+    handle_timeout thread                     handle_reply thread
+
+                                              down_read(&osdc->lock)
+                                              req = lookup_request(...)
+                                              ...
+                                              finish_request(req)  # unregisters
+                                              up_read(&osdc->lock)
+                                              __complete_request(req)
+                                                linger_ping_cb(req)
+
+      # req->r_kref == 2 because handle_reply still holds its ref
+
+    down_write(&osdc->lock)
+    send_linger_ping(lreq)
+      req = lreq->ping_req  # same req
+      # cancel_linger_request is NOT
+      # called - handle_reply already
+      # unregistered
+      request_reinit(req)
+        WARN_ON(req->r_kref != 1)  # fires
+        request_init(req)
+          kref_init(req->r_kref)
+
+                   # req->r_kref == 1 after kref_init
+
+                                              ceph_osdc_put_request(req)
+                                                kref_put(req->r_kref)
+
+            # req->r_kref == 0 after kref_put, req is freed
+
+        <further req initialization/use> !!!
+
+This happens because send_linger_ping() always (re)uses the same OSD
+request for watch ping requests, relying on cancel_linger_request() to
+unregister it from the OSD client and rip its messages out from the
+messenger.  send_linger() does the same for watch/notify registration
+and watch reconnect requests.  Unfortunately cancel_request() doesn't
+guarantee that after it returns the OSD client would be completely done
+with the OSD request -- a ref could still be held and the callback (if
+specified) could still be invoked too.
+
+The original motivation for request_reinit() was inability to deal with
+allocation failures in send_linger() and send_linger_ping().  Switching
+to using osdc->req_mempool (currently only used by CephFS) respects that
+and allows us to get rid of request_reinit().
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/osd_client.h |    3 
+ net/ceph/osd_client.c           |  302 +++++++++++++++-------------------------
+ 2 files changed, 122 insertions(+), 183 deletions(-)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -287,6 +287,9 @@ struct ceph_osd_linger_request {
+       rados_watcherrcb_t errcb;
+       void *data;
++      struct ceph_pagelist *request_pl;
++      struct page **notify_id_pages;
++
+       struct page ***preply_pages;
+       size_t *preply_len;
+ };
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd
+       target_init(&req->r_t);
+ }
+-/*
+- * This is ugly, but it allows us to reuse linger registration and ping
+- * requests, keeping the structure of the code around send_linger{_ping}()
+- * reasonable.  Setting up a min_nr=2 mempool for each linger request
+- * and dealing with copying ops (this blasts req only, watch op remains
+- * intact) isn't any better.
+- */
+-static void request_reinit(struct ceph_osd_request *req)
+-{
+-      struct ceph_osd_client *osdc = req->r_osdc;
+-      bool mempool = req->r_mempool;
+-      unsigned int num_ops = req->r_num_ops;
+-      u64 snapid = req->r_snapid;
+-      struct ceph_snap_context *snapc = req->r_snapc;
+-      bool linger = req->r_linger;
+-      struct ceph_msg *request_msg = req->r_request;
+-      struct ceph_msg *reply_msg = req->r_reply;
+-
+-      dout("%s req %p\n", __func__, req);
+-      WARN_ON(kref_read(&req->r_kref) != 1);
+-      request_release_checks(req);
+-
+-      WARN_ON(kref_read(&request_msg->kref) != 1);
+-      WARN_ON(kref_read(&reply_msg->kref) != 1);
+-      target_destroy(&req->r_t);
+-
+-      request_init(req);
+-      req->r_osdc = osdc;
+-      req->r_mempool = mempool;
+-      req->r_num_ops = num_ops;
+-      req->r_snapid = snapid;
+-      req->r_snapc = snapc;
+-      req->r_linger = linger;
+-      req->r_request = request_msg;
+-      req->r_reply = reply_msg;
+-}
+-
+ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+                                              struct ceph_snap_context *snapc,
+                                              unsigned int num_ops,
+@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
+  * @watch_opcode: CEPH_OSD_WATCH_OP_*
+  */
+ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
+-                                u64 cookie, u8 watch_opcode)
++                                u8 watch_opcode, u64 cookie, u32 gen)
+ {
+       struct ceph_osd_req_op *op;
+       op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
+       op->watch.cookie = cookie;
+       op->watch.op = watch_opcode;
+-      op->watch.gen = 0;
++      op->watch.gen = gen;
++}
++
++/*
++ * prot_ver, timeout and notify payload (may be empty) should already be
++ * encoded in @request_pl
++ */
++static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
++                                 u64 cookie, struct ceph_pagelist *request_pl)
++{
++      struct ceph_osd_req_op *op;
++
++      op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
++      op->notify.cookie = cookie;
++
++      ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
++      op->indata_len = request_pl->length;
+ }
+ /*
+@@ -2727,10 +2706,13 @@ static void linger_release(struct kref *
+       WARN_ON(!list_empty(&lreq->pending_lworks));
+       WARN_ON(lreq->osd);
+-      if (lreq->reg_req)
+-              ceph_osdc_put_request(lreq->reg_req);
+-      if (lreq->ping_req)
+-              ceph_osdc_put_request(lreq->ping_req);
++      if (lreq->request_pl)
++              ceph_pagelist_release(lreq->request_pl);
++      if (lreq->notify_id_pages)
++              ceph_release_page_vector(lreq->notify_id_pages, 1);
++
++      ceph_osdc_put_request(lreq->reg_req);
++      ceph_osdc_put_request(lreq->ping_req);
+       target_destroy(&lreq->t);
+       kfree(lreq);
+ }
+@@ -2999,6 +2981,12 @@ static void linger_commit_cb(struct ceph
+       struct ceph_osd_linger_request *lreq = req->r_priv;
+       mutex_lock(&lreq->lock);
++      if (req != lreq->reg_req) {
++              dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++                   __func__, lreq, lreq->linger_id, req, lreq->reg_req);
++              goto out;
++      }
++
+       dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
+            lreq->linger_id, req->r_result);
+       linger_reg_commit_complete(lreq, req->r_result);
+@@ -3022,6 +3010,7 @@ static void linger_commit_cb(struct ceph
+               }
+       }
++out:
+       mutex_unlock(&lreq->lock);
+       linger_put(lreq);
+ }
+@@ -3044,6 +3033,12 @@ static void linger_reconnect_cb(struct c
+       struct ceph_osd_linger_request *lreq = req->r_priv;
+       mutex_lock(&lreq->lock);
++      if (req != lreq->reg_req) {
++              dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++                   __func__, lreq, lreq->linger_id, req, lreq->reg_req);
++              goto out;
++      }
++
+       dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
+            lreq, lreq->linger_id, req->r_result, lreq->last_error);
+       if (req->r_result < 0) {
+@@ -3053,46 +3048,64 @@ static void linger_reconnect_cb(struct c
+               }
+       }
++out:
+       mutex_unlock(&lreq->lock);
+       linger_put(lreq);
+ }
+ static void send_linger(struct ceph_osd_linger_request *lreq)
+ {
+-      struct ceph_osd_request *req = lreq->reg_req;
+-      struct ceph_osd_req_op *op = &req->r_ops[0];
++      struct ceph_osd_client *osdc = lreq->osdc;
++      struct ceph_osd_request *req;
++      int ret;
+-      verify_osdc_wrlocked(req->r_osdc);
++      verify_osdc_wrlocked(osdc);
++      mutex_lock(&lreq->lock);
+       dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+-      if (req->r_osd)
+-              cancel_linger_request(req);
++      if (lreq->reg_req) {
++              if (lreq->reg_req->r_osd)
++                      cancel_linger_request(lreq->reg_req);
++              ceph_osdc_put_request(lreq->reg_req);
++      }
++
++      req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
++      BUG_ON(!req);
+-      request_reinit(req);
+       target_copy(&req->r_t, &lreq->t);
+       req->r_mtime = lreq->mtime;
+-      mutex_lock(&lreq->lock);
+       if (lreq->is_watch && lreq->committed) {
+-              WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
+-                      op->watch.cookie != lreq->linger_id);
+-              op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
+-              op->watch.gen = ++lreq->register_gen;
++              osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
++                                    lreq->linger_id, ++lreq->register_gen);
+               dout("lreq %p reconnect register_gen %u\n", lreq,
+-                   op->watch.gen);
++                   req->r_ops[0].watch.gen);
+               req->r_callback = linger_reconnect_cb;
+       } else {
+-              if (!lreq->is_watch)
++              if (lreq->is_watch) {
++                      osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
++                                            lreq->linger_id, 0);
++              } else {
+                       lreq->notify_id = 0;
+-              else
+-                      WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
++
++                      refcount_inc(&lreq->request_pl->refcnt);
++                      osd_req_op_notify_init(req, 0, lreq->linger_id,
++                                             lreq->request_pl);
++                      ceph_osd_data_pages_init(
++                          osd_req_op_data(req, 0, notify, response_data),
++                          lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
++              }
+               dout("lreq %p register\n", lreq);
+               req->r_callback = linger_commit_cb;
+       }
+-      mutex_unlock(&lreq->lock);
++
++      ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
++      BUG_ON(ret);
+       req->r_priv = linger_get(lreq);
+       req->r_linger = true;
++      lreq->reg_req = req;
++      mutex_unlock(&lreq->lock);
+       submit_request(req, true);
+ }
+@@ -3102,6 +3115,12 @@ static void linger_ping_cb(struct ceph_o
+       struct ceph_osd_linger_request *lreq = req->r_priv;
+       mutex_lock(&lreq->lock);
++      if (req != lreq->ping_req) {
++              dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++                   __func__, lreq, lreq->linger_id, req, lreq->ping_req);
++              goto out;
++      }
++
+       dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
+            __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
+            lreq->last_error);
+@@ -3117,6 +3136,7 @@ static void linger_ping_cb(struct ceph_o
+                    lreq->register_gen, req->r_ops[0].watch.gen);
+       }
++out:
+       mutex_unlock(&lreq->lock);
+       linger_put(lreq);
+ }
+@@ -3124,8 +3144,8 @@ static void linger_ping_cb(struct ceph_o
+ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
+ {
+       struct ceph_osd_client *osdc = lreq->osdc;
+-      struct ceph_osd_request *req = lreq->ping_req;
+-      struct ceph_osd_req_op *op = &req->r_ops[0];
++      struct ceph_osd_request *req;
++      int ret;
+       if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
+               dout("%s PAUSERD\n", __func__);
+@@ -3137,19 +3157,26 @@ static void send_linger_ping(struct ceph
+            __func__, lreq, lreq->linger_id, lreq->ping_sent,
+            lreq->register_gen);
+-      if (req->r_osd)
+-              cancel_linger_request(req);
++      if (lreq->ping_req) {
++              if (lreq->ping_req->r_osd)
++                      cancel_linger_request(lreq->ping_req);
++              ceph_osdc_put_request(lreq->ping_req);
++      }
+-      request_reinit(req);
+-      target_copy(&req->r_t, &lreq->t);
++      req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
++      BUG_ON(!req);
+-      WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
+-              op->watch.cookie != lreq->linger_id ||
+-              op->watch.op != CEPH_OSD_WATCH_OP_PING);
+-      op->watch.gen = lreq->register_gen;
++      target_copy(&req->r_t, &lreq->t);
++      osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
++                            lreq->register_gen);
+       req->r_callback = linger_ping_cb;
++
++      ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
++      BUG_ON(ret);
++
+       req->r_priv = linger_get(lreq);
+       req->r_linger = true;
++      lreq->ping_req = req;
+       ceph_osdc_get_request(req);
+       account_request(req);
+@@ -3165,12 +3192,6 @@ static void linger_submit(struct ceph_os
+       down_write(&osdc->lock);
+       linger_register(lreq);
+-      if (lreq->is_watch) {
+-              lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
+-              lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
+-      } else {
+-              lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
+-      }
+       calc_target(osdc, &lreq->t, false);
+       osd = lookup_create_osd(osdc, lreq->t.osd, true);
+@@ -3202,9 +3223,9 @@ static void cancel_linger_map_check(stru
+  */
+ static void __linger_cancel(struct ceph_osd_linger_request *lreq)
+ {
+-      if (lreq->is_watch && lreq->ping_req->r_osd)
++      if (lreq->ping_req && lreq->ping_req->r_osd)
+               cancel_linger_request(lreq->ping_req);
+-      if (lreq->reg_req->r_osd)
++      if (lreq->reg_req && lreq->reg_req->r_osd)
+               cancel_linger_request(lreq->reg_req);
+       cancel_linger_map_check(lreq);
+       unlink_linger(lreq->osd, lreq);
+@@ -4651,43 +4672,6 @@ again:
+ }
+ EXPORT_SYMBOL(ceph_osdc_sync);
+-static struct ceph_osd_request *
+-alloc_linger_request(struct ceph_osd_linger_request *lreq)
+-{
+-      struct ceph_osd_request *req;
+-
+-      req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
+-      if (!req)
+-              return NULL;
+-
+-      ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
+-      ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+-      return req;
+-}
+-
+-static struct ceph_osd_request *
+-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
+-{
+-      struct ceph_osd_request *req;
+-
+-      req = alloc_linger_request(lreq);
+-      if (!req)
+-              return NULL;
+-
+-      /*
+-       * Pass 0 for cookie because we don't know it yet, it will be
+-       * filled in by linger_submit().
+-       */
+-      osd_req_op_watch_init(req, 0, 0, watch_opcode);
+-
+-      if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
+-              ceph_osdc_put_request(req);
+-              return NULL;
+-      }
+-
+-      return req;
+-}
+-
+ /*
+  * Returns a handle, caller owns a ref.
+  */
+@@ -4717,18 +4701,6 @@ ceph_osdc_watch(struct ceph_osd_client *
+       lreq->t.flags = CEPH_OSD_FLAG_WRITE;
+       ktime_get_real_ts64(&lreq->mtime);
+-      lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
+-      if (!lreq->reg_req) {
+-              ret = -ENOMEM;
+-              goto err_put_lreq;
+-      }
+-
+-      lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
+-      if (!lreq->ping_req) {
+-              ret = -ENOMEM;
+-              goto err_put_lreq;
+-      }
+-
+       linger_submit(lreq);
+       ret = linger_reg_commit_wait(lreq);
+       if (ret) {
+@@ -4766,8 +4738,8 @@ int ceph_osdc_unwatch(struct ceph_osd_cl
+       ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+       req->r_flags = CEPH_OSD_FLAG_WRITE;
+       ktime_get_real_ts64(&req->r_mtime);
+-      osd_req_op_watch_init(req, 0, lreq->linger_id,
+-                            CEPH_OSD_WATCH_OP_UNWATCH);
++      osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
++                            lreq->linger_id, 0);
+       ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+       if (ret)
+@@ -4853,35 +4825,6 @@ out_put_req:
+ }
+ EXPORT_SYMBOL(ceph_osdc_notify_ack);
+-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+-                                u64 cookie, u32 prot_ver, u32 timeout,
+-                                void *payload, u32 payload_len)
+-{
+-      struct ceph_osd_req_op *op;
+-      struct ceph_pagelist *pl;
+-      int ret;
+-
+-      op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+-      op->notify.cookie = cookie;
+-
+-      pl = ceph_pagelist_alloc(GFP_NOIO);
+-      if (!pl)
+-              return -ENOMEM;
+-
+-      ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
+-      ret |= ceph_pagelist_encode_32(pl, timeout);
+-      ret |= ceph_pagelist_encode_32(pl, payload_len);
+-      ret |= ceph_pagelist_append(pl, payload, payload_len);
+-      if (ret) {
+-              ceph_pagelist_release(pl);
+-              return -ENOMEM;
+-      }
+-
+-      ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
+-      op->indata_len = pl->length;
+-      return 0;
+-}
+-
+ /*
+  * @timeout: in seconds
+  *
+@@ -4900,7 +4843,6 @@ int ceph_osdc_notify(struct ceph_osd_cli
+                    size_t *preply_len)
+ {
+       struct ceph_osd_linger_request *lreq;
+-      struct page **pages;
+       int ret;
+       WARN_ON(!timeout);
+@@ -4913,41 +4855,35 @@ int ceph_osdc_notify(struct ceph_osd_cli
+       if (!lreq)
+               return -ENOMEM;
+-      lreq->preply_pages = preply_pages;
+-      lreq->preply_len = preply_len;
+-
+-      ceph_oid_copy(&lreq->t.base_oid, oid);
+-      ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+-      lreq->t.flags = CEPH_OSD_FLAG_READ;
+-
+-      lreq->reg_req = alloc_linger_request(lreq);
+-      if (!lreq->reg_req) {
++      lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
++      if (!lreq->request_pl) {
+               ret = -ENOMEM;
+               goto out_put_lreq;
+       }
+-      /*
+-       * Pass 0 for cookie because we don't know it yet, it will be
+-       * filled in by linger_submit().
+-       */
+-      ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
+-                                   payload, payload_len);
+-      if (ret)
++      ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
++      ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
++      ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
++      ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
++      if (ret) {
++              ret = -ENOMEM;
+               goto out_put_lreq;
++      }
+       /* for notify_id */
+-      pages = ceph_alloc_page_vector(1, GFP_NOIO);
+-      if (IS_ERR(pages)) {
+-              ret = PTR_ERR(pages);
++      lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
++      if (IS_ERR(lreq->notify_id_pages)) {
++              ret = PTR_ERR(lreq->notify_id_pages);
++              lreq->notify_id_pages = NULL;
+               goto out_put_lreq;
+       }
+-      ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
+-                                               response_data),
+-                               pages, PAGE_SIZE, 0, false, true);
+-      ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
+-      if (ret)
+-              goto out_put_lreq;
++      lreq->preply_pages = preply_pages;
++      lreq->preply_len = preply_len;
++
++      ceph_oid_copy(&lreq->t.base_oid, oid);
++      ceph_oloc_copy(&lreq->t.base_oloc, oloc);
++      lreq->t.flags = CEPH_OSD_FLAG_READ;
+       linger_submit(lreq);
+       ret = linger_reg_commit_wait(lreq);
diff --git a/queue-5.10/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch b/queue-5.10/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch
new file mode 100644 (file)
index 0000000..8e72e5b
--- /dev/null
@@ -0,0 +1,51 @@
+From 92597f97a40bf661bebceb92e26ff87c76d562d4 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Thu, 31 Mar 2022 19:38:51 +0200
+Subject: PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 92597f97a40bf661bebceb92e26ff87c76d562d4 upstream.
+
+If a Root Port on Elo i2 is put into D3cold and then back into D0, the
+downstream device becomes permanently inaccessible, so add a bridge D3 DMI
+quirk for that system.
+
+This was exposed by 14858dcc3b35 ("PCI: Use pci_update_current_state() in
+pci_enable_device_flags()"), but before that commit the Root Port in
+question had never been put into D3cold for real due to a mismatch between
+its power state retrieved from the PCI_PM_CTRL register (which was
+accessible even though the platform firmware indicated that the port was in
+D3cold) and the state of an ACPI power resource involved in its power
+management.
+
+BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215715
+Link: https://lore.kernel.org/r/11980172.O9o76ZdvQC@kreacher
+Reported-by: Stefan Gottwald <gottwald@igel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org     # v5.15+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -2829,6 +2829,16 @@ static const struct dmi_system_id bridge
+                       DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
+                       DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
+               },
++              /*
++               * Downstream device is not accessible after putting a root port
++               * into D3cold and back into D0 on Elo i2.
++               */
++              .ident = "Elo i2",
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"),
++                      DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"),
++              },
+       },
+ #endif
+       { }
diff --git a/queue-5.10/perf-fix-sys_perf_event_open-race-against-self.patch b/queue-5.10/perf-fix-sys_perf_event_open-race-against-self.patch
new file mode 100644 (file)
index 0000000..7c0f5af
--- /dev/null
@@ -0,0 +1,71 @@
+From 3ac6487e584a1eb54071dbe1212e05b884136704 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 20 May 2022 20:38:06 +0200
+Subject: perf: Fix sys_perf_event_open() race against self
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3ac6487e584a1eb54071dbe1212e05b884136704 upstream.
+
+Norbert reported that it's possible to race sys_perf_event_open() such
+that the looser ends up in another context from the group leader,
+triggering many WARNs.
+
+The move_group case checks for races against itself, but the
+!move_group case doesn't, seemingly relying on the previous
+group_leader->ctx == ctx check. However, that check is racy due to not
+holding any locks at that time.
+
+Therefore, re-check the result after acquiring locks and bailing
+if they no longer match.
+
+Additionally, clarify the not_move_group case from the
+move_group-vs-move_group race.
+
+Fixes: f63a8daa5812 ("perf: Fix event->ctx locking")
+Reported-by: Norbert Slusarek <nslusarek@gmx.net>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -11946,6 +11946,9 @@ SYSCALL_DEFINE5(perf_event_open,
+                * Do not allow to attach to a group in a different task
+                * or CPU context. If we're moving SW events, we'll fix
+                * this up later, so allow that.
++               *
++               * Racy, not holding group_leader->ctx->mutex, see comment with
++               * perf_event_ctx_lock().
+                */
+               if (!move_group && group_leader->ctx != ctx)
+                       goto err_context;
+@@ -12013,6 +12016,7 @@ SYSCALL_DEFINE5(perf_event_open,
+                       } else {
+                               perf_event_ctx_unlock(group_leader, gctx);
+                               move_group = 0;
++                              goto not_move_group;
+                       }
+               }
+@@ -12029,7 +12033,17 @@ SYSCALL_DEFINE5(perf_event_open,
+               }
+       } else {
+               mutex_lock(&ctx->mutex);
++
++              /*
++               * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
++               * see the group_leader && !move_group test earlier.
++               */
++              if (group_leader && group_leader->ctx != ctx) {
++                      err = -EINVAL;
++                      goto err_locked;
++              }
+       }
++not_move_group:
+       if (ctx->task == TASK_TOMBSTONE) {
+               err = -ESRCH;
diff --git a/queue-5.10/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch b/queue-5.10/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch
new file mode 100644 (file)
index 0000000..d7a68bf
--- /dev/null
@@ -0,0 +1,36 @@
+From 6254bd3db316c9ccb3b05caa8b438be63245466f Mon Sep 17 00:00:00 2001
+From: Ondrej Mosnacek <omosnace@redhat.com>
+Date: Tue, 17 May 2022 14:08:16 +0200
+Subject: selinux: fix bad cleanup on error in hashtab_duplicate()
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+commit 6254bd3db316c9ccb3b05caa8b438be63245466f upstream.
+
+The code attempts to free the 'new' pointer using kmem_cache_free(),
+which is wrong because this function isn't responsible of freeing it.
+Instead, the function should free new->htable and clear the contents of
+*new (to prevent double-free).
+
+Cc: stable@vger.kernel.org
+Fixes: c7c556f1e81b ("selinux: refactor changing booleans")
+Reported-by: Wander Lairson Costa <wander@redhat.com>
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/ss/hashtab.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/security/selinux/ss/hashtab.c
++++ b/security/selinux/ss/hashtab.c
+@@ -178,7 +178,8 @@ int hashtab_duplicate(struct hashtab *ne
+                       kmem_cache_free(hashtab_node_cachep, cur);
+               }
+       }
+-      kmem_cache_free(hashtab_node_cachep, new);
++      kfree(new->htable);
++      memset(new, 0, sizeof(*new));
+       return -ENOMEM;
+ }
index d1b1e81e4b6e8342976d67b150294cdd94c9c696..9e42bfafd40c85afe68e1742a30e79f49083ee41 100644 (file)
@@ -33,3 +33,14 @@ reinstate-some-of-swiotlb-rework-fix-info-leak-with-.patch
 alsa-usb-audio-restore-rane-sl-1-quirk.patch
 alsa-wavefront-proper-check-of-get_user-error.patch
 alsa-hda-realtek-add-quirk-for-tongfang-devices-with-pop-noise.patch
+perf-fix-sys_perf_event_open-race-against-self.patch
+selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch
+fix-double-fget-in-vhost_net_set_backend.patch
+pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch
+kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch
+arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch
+arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch
+crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch
+libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch
+drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch
+dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch