5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)
diff --git a/queue-5.15/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-5.15/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch

new file mode 100644 (file)

index 0000000..5a8ef5f
--- /dev/null
+++ b/queue-5.15/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
@@ -0,0 +1,123 @@
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c |    9 ++++++---
+ kernel/irq/cpuhotplug.c       |   16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -982,7 +982,8 @@ static void __send_cleanup_vector(struct
+               hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+               apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+-              apicd->prev_vector = 0;
++              pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++              free_moved_vector(apicd);
+       }
+       raw_spin_unlock(&vector_lock);
+ }
+@@ -1019,6 +1020,7 @@ void irq_complete_move(struct irq_cfg *c
+  */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++      unsigned int cpu = smp_processor_id();
+       struct apic_chip_data *apicd;
+       struct irq_data *irqd;
+       unsigned int vector;
+@@ -1043,10 +1045,11 @@ void irq_force_complete_move(struct irq_
+               goto unlock;
+ 
+       /*
+-       * If prev_vector is empty, no action required.
++       * If prev_vector is empty or the descriptor is neither currently
++       * nor previously on the outgoing CPU no action required.
+        */
+       vector = apicd->prev_vector;
+-      if (!vector)
++      if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+               goto unlock;
+ 
+       /*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+       }
+ 
+       /*
++       * Complete an eventually pending irq move cleanup. If this
++       * interrupt was moved in hard irq context, then the vectors need
++       * to be cleaned up. It can't wait until this interrupt actually
++       * happens and this CPU was involved.
++       */
++      irq_force_complete_move(desc);
++
++      /*
+        * No move required, if:
+        * - Interrupt is per cpu
+        * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+       }
+ 
+       /*
+-       * Complete an eventually pending irq move cleanup. If this
+-       * interrupt was moved in hard irq context, then the vectors need
+-       * to be cleaned up. It can't wait until this interrupt actually
+-       * happens and this CPU was involved.
+-       */
+-      irq_force_complete_move(desc);
+-
+-      /*
+        * If there is a setaffinity pending, then try to reuse the pending
+        * mask, so the last change of the affinity does not get lost. If
+        * there is no move pending or the pending mask does not contain
diff --git a/queue-5.15/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-5.15/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch

new file mode 100644 (file)

index 0000000..3ce82ec
--- /dev/null
+++ b/queue-5.15/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
@@ -0,0 +1,74 @@
+From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Wed, 13 Mar 2024 13:58:42 +0100
+Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID
+
+From: Gerd Hoffmann <kraxel@redhat.com>
+
+commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream.
+
+Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16])
+to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths
+to userspace via KVM_GET_SUPPORTED_CPUID.
+
+Per AMD, GuestPhysBits is intended for software use, and physical CPUs do
+not set that field.  I.e. GuestPhysBits will be non-zero if and only if
+KVM is running as a nested hypervisor, and in that case, GuestPhysBits is
+NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with
+TDP enabled.
+
+E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum
+*addressable* guest physical address, which would result in KVM under-
+reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52,
+but without 5-level paging.
+
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com
+[sean: rewrite changelog with --verbose, Cc stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -934,9 +934,8 @@ static inline int __do_cpuid_func(struct
+               entry->eax = entry->ebx = entry->ecx = 0;
+               break;
+       case 0x80000008: {
+-              unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+-              unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+-              unsigned phys_as = entry->eax & 0xff;
++              unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
++              unsigned int phys_as;
+ 
+               /*
+                * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+@@ -944,16 +943,16 @@ static inline int __do_cpuid_func(struct
+                * reductions in MAXPHYADDR for memory encryption affect shadow
+                * paging, too.
+                *
+-               * If TDP is enabled but an explicit guest MAXPHYADDR is not
+-               * provided, use the raw bare metal MAXPHYADDR as reductions to
+-               * the HPAs do not affect GPAs.
++               * If TDP is enabled, use the raw bare metal MAXPHYADDR as
++               * reductions to the HPAs do not affect GPAs.
+                */
+-              if (!tdp_enabled)
+-                      g_phys_as = boot_cpu_data.x86_phys_bits;
+-              else if (!g_phys_as)
+-                      g_phys_as = phys_as;
++              if (!tdp_enabled) {
++                      phys_as = boot_cpu_data.x86_phys_bits;
++              } else {
++                      phys_as = entry->eax & 0xff;
++              }
+ 
+-              entry->eax = g_phys_as | (virt_as << 8);
++              entry->eax = phys_as | (virt_as << 8);
+               entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+               entry->edx = 0;
+               cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/queue-5.15/series b/queue-5.15/series

index aa2a7c44a779bc7b01d529c2aa0bec022402c543..762c0d8b739831dc0e7538efc03adca806439122 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -335,3 +335,5 @@ ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch
  powerpc-uaccess-use-yz-asm-constraint-for-ld.patch
  hwmon-shtc1-fix-property-misspelling.patch
  alsa-timer-set-lower-bound-of-start-tick-time.patch
+kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
+genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 6 Jun 2024 13:12:24 +0000 (15:12 +0200)
queue-5.15/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history