From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 20 Feb 2023 11:21:11 +0000 (+0100)
Subject: 6.1-stable patches
X-Git-Tag: v4.14.306~11
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=847d81b7277add8ec4e23c4a5b1e218a2214ac89;p=thirdparty%2Fkernel%2Fstable-queue.git

6.1-stable patches

added patches:
	alarmtimer-prevent-starvation-by-small-intervals-and-sig_ign.patch
	kvm-initialize-all-of-the-kvm_debugregs-structure-before-sending-it-to-userspace.patch
	kvm-x86-pmu-disable-vpmu-support-on-hybrid-cpus-host-pmus.patch
	nvme-pci-refresh-visible-attrs-for-cmb-attributes.patch
	perf-x86-refuse-to-export-capabilities-for-hybrid-pmus.patch
---

diff --git a/queue-6.1/alarmtimer-prevent-starvation-by-small-intervals-and-sig_ign.patch b/queue-6.1/alarmtimer-prevent-starvation-by-small-intervals-and-sig_ign.patch
new file mode 100644
index 00000000000..bc89e500310
--- /dev/null
+++ b/queue-6.1/alarmtimer-prevent-starvation-by-small-intervals-and-sig_ign.patch
@@ -0,0 +1,132 @@
+From d125d1349abeb46945dc5e98f7824bf688266f13 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 9 Feb 2023 23:25:49 +0100
+Subject: alarmtimer: Prevent starvation by small intervals and SIG_IGN
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d125d1349abeb46945dc5e98f7824bf688266f13 upstream.
+
+syzbot reported a RCU stall which is caused by setting up an alarmtimer
+with a very small interval and ignoring the signal. The reproducer arms the
+alarm timer with a relative expiry of 8ns and an interval of 9ns. Not a
+problem per se, but that's an issue when the signal is ignored because then
+the timer is immediately rearmed because there is no way to delay that
+rearming to the signal delivery path.  See posix_timer_fn() and commit
+58229a189942 ("posix-timers: Prevent softirq starvation by small intervals
+and SIG_IGN") for details.
+
+The reproducer does not set SIG_IGN explicitely, but it sets up the timers
+signal with SIGCONT. That has the same effect as explicitely setting
+SIG_IGN for a signal as SIGCONT is ignored if there is no handler set and
+the task is not ptraced.
+
+The log clearly shows that:
+
+   [pid  5102] --- SIGCONT {si_signo=SIGCONT, si_code=SI_TIMER, si_timerid=0, si_overrun=316014, si_int=0, si_ptr=NULL} ---
+
+It works because the tasks are traced and therefore the signal is queued so
+the tracer can see it, which delays the restart of the timer to the signal
+delivery path. But then the tracer is killed:
+
+   [pid  5087] kill(-5102, SIGKILL <unfinished ...>
+   ...
+   ./strace-static-x86_64: Process 5107 detached
+
+and after it's gone the stall can be observed:
+
+   syzkaller login: [   79.439102][    C0] hrtimer: interrupt took 68471 ns
+   [  184.460538][    C1] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
+   ...
+   [  184.658237][    C1] rcu: Stack dump where RCU GP kthread last ran:
+   [  184.664574][    C1] Sending NMI from CPU 1 to CPUs 0:
+   [  184.669821][    C0] NMI backtrace for cpu 0
+   [  184.669831][    C0] CPU: 0 PID: 5108 Comm: syz-executor192 Not tainted 6.2.0-rc6-next-20230203-syzkaller #0
+   ...
+   [  184.670036][    C0] Call Trace:
+   [  184.670041][    C0]  <IRQ>
+   [  184.670045][    C0]  alarmtimer_fired+0x327/0x670
+
+posix_timer_fn() prevents that by checking whether the interval for
+timers which have the signal ignored is smaller than a jiffie and
+artifically delay it by shifting the next expiry out by a jiffie. That's
+accurate vs. the overrun accounting, but slightly inaccurate
+vs. timer_gettimer(2).
+
+The comment in that function says what needs to be done and there was a fix
+available for the regular userspace induced SIG_IGN mechanism, but that did
+not work due to the implicit ignore for SIGCONT and similar signals. This
+needs to be worked on, but for now the only available workaround is to do
+exactly what posix_timer_fn() does:
+
+Increase the interval of self-rearming timers, which have their signal
+ignored, to at least a jiffie.
+
+Interestingly this has been fixed before via commit ff86bf0c65f1
+("alarmtimer: Rate limit periodic intervals") already, but that fix got
+lost in a later rework.
+
+Reported-by: syzbot+b9564ba6e8e00694511b@syzkaller.appspotmail.com
+Fixes: f2c45807d399 ("alarmtimer: Switch over to generic set/get/rearm routine")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: John Stultz <jstultz@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87k00q1no2.ffs@tglx
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/alarmtimer.c |   33 +++++++++++++++++++++++++++++----
+ 1 file changed, 29 insertions(+), 4 deletions(-)
+
+--- a/kernel/time/alarmtimer.c
++++ b/kernel/time/alarmtimer.c
+@@ -470,11 +470,35 @@ u64 alarm_forward(struct alarm *alarm, k
+ }
+ EXPORT_SYMBOL_GPL(alarm_forward);
+ 
+-u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
++static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle)
+ {
+ 	struct alarm_base *base = &alarm_bases[alarm->type];
++	ktime_t now = base->get_ktime();
++
++	if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) {
++		/*
++		 * Same issue as with posix_timer_fn(). Timers which are
++		 * periodic but the signal is ignored can starve the system
++		 * with a very small interval. The real fix which was
++		 * promised in the context of posix_timer_fn() never
++		 * materialized, but someone should really work on it.
++		 *
++		 * To prevent DOS fake @now to be 1 jiffie out which keeps
++		 * the overrun accounting correct but creates an
++		 * inconsistency vs. timer_gettime(2).
++		 */
++		ktime_t kj = NSEC_PER_SEC / HZ;
++
++		if (interval < kj)
++			now = ktime_add(now, kj);
++	}
++
++	return alarm_forward(alarm, now, interval);
++}
+ 
+-	return alarm_forward(alarm, base->get_ktime(), interval);
++u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
++{
++	return __alarm_forward_now(alarm, interval, false);
+ }
+ EXPORT_SYMBOL_GPL(alarm_forward_now);
+ 
+@@ -551,9 +575,10 @@ static enum alarmtimer_restart alarm_han
+ 	if (posix_timer_event(ptr, si_private) && ptr->it_interval) {
+ 		/*
+ 		 * Handle ignored signals and rearm the timer. This will go
+-		 * away once we handle ignored signals proper.
++		 * away once we handle ignored signals proper. Ensure that
++		 * small intervals cannot starve the system.
+ 		 */
+-		ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval);
++		ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true);
+ 		++ptr->it_requeue_pending;
+ 		ptr->it_active = 1;
+ 		result = ALARMTIMER_RESTART;
diff --git a/queue-6.1/kvm-initialize-all-of-the-kvm_debugregs-structure-before-sending-it-to-userspace.patch b/queue-6.1/kvm-initialize-all-of-the-kvm_debugregs-structure-before-sending-it-to-userspace.patch
new file mode 100644
index 00000000000..87886ed01e0
--- /dev/null
+++ b/queue-6.1/kvm-initialize-all-of-the-kvm_debugregs-structure-before-sending-it-to-userspace.patch
@@ -0,0 +1,53 @@
+From 2c10b61421a28e95a46ab489fd56c0f442ff6952 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Tue, 14 Feb 2023 11:33:04 +0100
+Subject: kvm: initialize all of the kvm_debugregs structure before sending it to userspace
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+commit 2c10b61421a28e95a46ab489fd56c0f442ff6952 upstream.
+
+When calling the KVM_GET_DEBUGREGS ioctl, on some configurations, there
+might be some unitialized portions of the kvm_debugregs structure that
+could be copied to userspace.  Prevent this as is done in the other kvm
+ioctls, by setting the whole structure to 0 before copying anything into
+it.
+
+Bonus is that this reduces the lines of code as the explicit flag
+setting and reserved space zeroing out can be removed.
+
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <x86@kernel.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: stable <stable@kernel.org>
+Reported-by: Xingyuan Mo <hdthky0@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Message-Id: <20230214103304.3689213-1-gregkh@linuxfoundation.org>
+Tested-by: Xingyuan Mo <hdthky0@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5250,12 +5250,11 @@ static void kvm_vcpu_ioctl_x86_get_debug
+ {
+ 	unsigned long val;
+ 
++	memset(dbgregs, 0, sizeof(*dbgregs));
+ 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
+ 	kvm_get_dr(vcpu, 6, &val);
+ 	dbgregs->dr6 = val;
+ 	dbgregs->dr7 = vcpu->arch.dr7;
+-	dbgregs->flags = 0;
+-	memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
+ }
+ 
+ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
diff --git a/queue-6.1/kvm-x86-pmu-disable-vpmu-support-on-hybrid-cpus-host-pmus.patch b/queue-6.1/kvm-x86-pmu-disable-vpmu-support-on-hybrid-cpus-host-pmus.patch
new file mode 100644
index 00000000000..3850328f759
--- /dev/null
+++ b/queue-6.1/kvm-x86-pmu-disable-vpmu-support-on-hybrid-cpus-host-pmus.patch
@@ -0,0 +1,87 @@
+From 4d7404e5ee0066e9a9e8268675de8a273b568b08 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 8 Feb 2023 20:42:29 +0000
+Subject: KVM: x86/pmu: Disable vPMU support on hybrid CPUs (host PMUs)
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 4d7404e5ee0066e9a9e8268675de8a273b568b08 upstream.
+
+Disable KVM support for virtualizing PMUs on hosts with hybrid PMUs until
+KVM gains a sane way to enumeration the hybrid vPMU to userspace and/or
+gains a mechanism to let userspace opt-in to the dangers of exposing a
+hybrid vPMU to KVM guests.  Virtualizing a hybrid PMU, or at least part of
+a hybrid PMU, is possible, but it requires careful, deliberate
+configuration from userspace.
+
+E.g. to expose full functionality, vCPUs need to be pinned to pCPUs to
+prevent migrating a vCPU between a big core and a little core, userspace
+must enumerate a reasonable topology to the guest, and guest CPUID must be
+curated per vCPU to enumerate accurate vPMU capabilities.
+
+The last point is especially problematic, as KVM doesn't control which
+pCPU it runs on when enumerating KVM's vPMU capabilities to userspace,
+i.e. userspace can't rely on KVM_GET_SUPPORTED_CPUID in it's current form.
+
+Alternatively, userspace could enable vPMU support by enumerating the
+set of features that are common and coherent across all cores, e.g. by
+filtering PMU events and restricting guest capabilities.  But again, that
+requires userspace to take action far beyond reflecting KVM's supported
+feature set into the guest.
+
+For now, simply disable vPMU support on hybrid CPUs to avoid inducing
+seemingly random #GPs in guests, and punt support for hybrid CPUs to a
+future enabling effort.
+
+Reported-by: Jianfeng Gao <jianfeng.gao@intel.com>
+Cc: stable@vger.kernel.org
+Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Link: https://lore.kernel.org/all/20220818181530.2355034-1-kan.liang@linux.intel.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230208204230.1360502-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/pmu.h |   26 +++++++++++++++++++-------
+ 1 file changed, 19 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -164,15 +164,27 @@ static inline void kvm_init_pmu_capabili
+ {
+ 	bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
+ 
+-	perf_get_x86_pmu_capability(&kvm_pmu_cap);
+-
+-	 /*
+-	  * For Intel, only support guest architectural pmu
+-	  * on a host with architectural pmu.
+-	  */
+-	if ((is_intel && !kvm_pmu_cap.version) || !kvm_pmu_cap.num_counters_gp)
++	/*
++	 * Hybrid PMUs don't play nice with virtualization without careful
++	 * configuration by userspace, and KVM's APIs for reporting supported
++	 * vPMU features do not account for hybrid PMUs.  Disable vPMU support
++	 * for hybrid PMUs until KVM gains a way to let userspace opt-in.
++	 */
++	if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ 		enable_pmu = false;
+ 
++	if (enable_pmu) {
++		perf_get_x86_pmu_capability(&kvm_pmu_cap);
++
++		/*
++		 * For Intel, only support guest architectural pmu
++		 * on a host with architectural pmu.
++		 */
++		if ((is_intel && !kvm_pmu_cap.version) ||
++		    !kvm_pmu_cap.num_counters_gp)
++			enable_pmu = false;
++	}
++
+ 	if (!enable_pmu) {
+ 		memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
+ 		return;
diff --git a/queue-6.1/nvme-pci-refresh-visible-attrs-for-cmb-attributes.patch b/queue-6.1/nvme-pci-refresh-visible-attrs-for-cmb-attributes.patch
new file mode 100644
index 00000000000..19ba2e2bdea
--- /dev/null
+++ b/queue-6.1/nvme-pci-refresh-visible-attrs-for-cmb-attributes.patch
@@ -0,0 +1,54 @@
+From e917a849c3fc317c4a5f82bb18726000173d39e6 Mon Sep 17 00:00:00 2001
+From: Keith Busch <kbusch@kernel.org>
+Date: Thu, 16 Feb 2023 08:44:03 -0800
+Subject: nvme-pci: refresh visible attrs for cmb attributes
+
+From: Keith Busch <kbusch@kernel.org>
+
+commit e917a849c3fc317c4a5f82bb18726000173d39e6 upstream.
+
+The sysfs group containing the cmb attributes is registered before the
+driver knows if they need to be visible or not. Update the group when
+cmb attributes are known to exist so the visibility setting is correct.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217037
+Fixes: 86adbf0cdb9ec65 ("nvme: simplify transport specific device attribute handling")
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/pci.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -109,6 +109,7 @@ struct nvme_queue;
+ 
+ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
++static void nvme_update_attrs(struct nvme_dev *dev);
+ 
+ /*
+  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+@@ -1967,6 +1968,8 @@ static void nvme_map_cmb(struct nvme_dev
+ 	if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
+ 			(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
+ 		pci_p2pmem_publish(pdev, true);
++
++	nvme_update_attrs(dev);
+ }
+ 
+ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+@@ -2250,6 +2253,11 @@ static const struct attribute_group *nvm
+ 	NULL,
+ };
+ 
++static void nvme_update_attrs(struct nvme_dev *dev)
++{
++	sysfs_update_group(&dev->ctrl.device->kobj, &nvme_pci_dev_attrs_group);
++}
++
+ /*
+  * nirqs is the number of interrupts available for write and read
+  * queues. The core already reserved an interrupt for the admin queue.
diff --git a/queue-6.1/perf-x86-refuse-to-export-capabilities-for-hybrid-pmus.patch b/queue-6.1/perf-x86-refuse-to-export-capabilities-for-hybrid-pmus.patch
new file mode 100644
index 00000000000..4a8430090bd
--- /dev/null
+++ b/queue-6.1/perf-x86-refuse-to-export-capabilities-for-hybrid-pmus.patch
@@ -0,0 +1,56 @@
+From 4b4191b8ae1278bde3642acaaef8f92810ed111a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 8 Feb 2023 20:42:30 +0000
+Subject: perf/x86: Refuse to export capabilities for hybrid PMUs
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 4b4191b8ae1278bde3642acaaef8f92810ed111a upstream.
+
+Now that KVM disables vPMU support on hybrid CPUs, WARN and return zeros
+if perf_get_x86_pmu_capability() is invoked on a hybrid CPU.  The helper
+doesn't provide an accurate accounting of the PMU capabilities for hybrid
+CPUs and needs to be enhanced if KVM, or anything else outside of perf,
+wants to act on the PMU capabilities.
+
+Cc: stable@vger.kernel.org
+Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Link: https://lore.kernel.org/all/20220818181530.2355034-1-kan.liang@linux.intel.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230208204230.1360502-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/core.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2994,17 +2994,19 @@ unsigned long perf_misc_flags(struct pt_
+ 
+ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+ {
+-	if (!x86_pmu_initialized()) {
++	/* This API doesn't currently support enumerating hybrid PMUs. */
++	if (WARN_ON_ONCE(cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) ||
++	    !x86_pmu_initialized()) {
+ 		memset(cap, 0, sizeof(*cap));
+ 		return;
+ 	}
+ 
+-	cap->version		= x86_pmu.version;
+ 	/*
+-	 * KVM doesn't support the hybrid PMU yet.
+-	 * Return the common value in global x86_pmu,
+-	 * which available for all cores.
++	 * Note, hybrid CPU models get tracked as having hybrid PMUs even when
++	 * all E-cores are disabled via BIOS.  When E-cores are disabled, the
++	 * base PMU holds the correct number of counters for P-cores.
+ 	 */
++	cap->version		= x86_pmu.version;
+ 	cap->num_counters_gp	= x86_pmu.num_counters;
+ 	cap->num_counters_fixed	= x86_pmu.num_counters_fixed;
+ 	cap->bit_width_gp	= x86_pmu.cntval_bits;
diff --git a/queue-6.1/series b/queue-6.1/series
index 9c31c2aae67..dd8377373bd 100644
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -109,3 +109,8 @@ net-sched-tcindex-search-key-must-be-16-bits.patch
 nvme-tcp-stop-auth-work-after-tearing-down-queues-in.patch
 nvme-rdma-stop-auth-work-after-tearing-down-queues-i.patch
 nvme-apple-fix-controller-shutdown-in-apple_nvme_dis.patch
+kvm-x86-pmu-disable-vpmu-support-on-hybrid-cpus-host-pmus.patch
+kvm-initialize-all-of-the-kvm_debugregs-structure-before-sending-it-to-userspace.patch
+perf-x86-refuse-to-export-capabilities-for-hybrid-pmus.patch
+alarmtimer-prevent-starvation-by-small-intervals-and-sig_ign.patch
+nvme-pci-refresh-visible-attrs-for-cmb-attributes.patch