]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Feb 2022 18:08:07 +0000 (19:08 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Feb 2022 18:08:07 +0000 (19:08 +0100)
added patches:
drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch
net-ipa-prevent-concurrent-replenish.patch
net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
perf-core-fix-cgroup-event-list-management.patch
perf-rework-perf_event_exit_event.patch
psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch
revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch
x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch
x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch

12 files changed:
queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch [new file with mode: 0644]
queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch [new file with mode: 0644]
queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch [new file with mode: 0644]
queue-5.10/net-ipa-prevent-concurrent-replenish.patch [new file with mode: 0644]
queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch [new file with mode: 0644]
queue-5.10/perf-core-fix-cgroup-event-list-management.patch [new file with mode: 0644]
queue-5.10/perf-rework-perf_event_exit_event.patch [new file with mode: 0644]
queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch [new file with mode: 0644]
queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch [new file with mode: 0644]
queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch [new file with mode: 0644]

diff --git a/queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch b/queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
new file mode 100644 (file)
index 0000000..1c69327
--- /dev/null
@@ -0,0 +1,96 @@
+From 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime@cerno.tech>
+Date: Thu, 19 Aug 2021 15:59:30 +0200
+Subject: drm/vc4: hdmi: Make sure the device is powered with CEC
+
+From: Maxime Ripard <maxime@cerno.tech>
+
+Commit 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 upstream.
+
+The original commit depended on a rework commit (724fc856c09e ("drm/vc4:
+hdmi: Split the CEC disable / enable functions in two")) that
+(rightfully) didn't reach stable.
+
+However, probably because the context changed, when the patch was
+applied to stable the pm_runtime_put called got moved to the end of the
+vc4_hdmi_cec_adap_enable function (that would have become
+vc4_hdmi_cec_disable with the rework) to vc4_hdmi_cec_init.
+
+This means that at probe time, we now drop our reference to the clocks
+and power domains and thus end up with a CPU hang when the CPU tries to
+access registers.
+
+The call to pm_runtime_resume_and_get() is also problematic since the
+.adap_enable CEC hook is called both to enable and to disable the
+controller. That means that we'll now call pm_runtime_resume_and_get()
+at disable time as well, messing with the reference counting.
+
+The behaviour we should have though would be to have
+pm_runtime_resume_and_get() called when the CEC controller is enabled,
+and pm_runtime_put when it's disabled.
+
+We need to move things around a bit to behave that way, but it aligns
+stable with upstream.
+
+Cc: <stable@vger.kernel.org> # 5.10.x
+Cc: <stable@vger.kernel.org> # 5.15.x
+Cc: <stable@vger.kernel.org> # 5.16.x
+Reported-by: Michael Stapelberg <michael+drm@stapelberg.ch>
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vc4/vc4_hdmi.c |   27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -1402,18 +1402,18 @@ static int vc4_hdmi_cec_adap_enable(stru
+       u32 val;
+       int ret;
+-      ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
+-      if (ret)
+-              return ret;
+-
+-      val = HDMI_READ(HDMI_CEC_CNTRL_5);
+-      val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
+-               VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
+-               VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
+-      val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
+-             ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
+-
+       if (enable) {
++              ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
++              if (ret)
++                      return ret;
++
++              val = HDMI_READ(HDMI_CEC_CNTRL_5);
++              val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
++                       VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
++                       VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
++              val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
++                      ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
++
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val |
+                          VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val);
+@@ -1439,7 +1439,10 @@ static int vc4_hdmi_cec_adap_enable(stru
+               HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, VC4_HDMI_CPU_CEC);
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val |
+                          VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
++
++              pm_runtime_put(&vc4_hdmi->pdev->dev);
+       }
++
+       return 0;
+ }
+@@ -1531,8 +1534,6 @@ static int vc4_hdmi_cec_init(struct vc4_
+       if (ret < 0)
+               goto err_delete_cec_adap;
+-      pm_runtime_put(&vc4_hdmi->pdev->dev);
+-
+       return 0;
+ err_delete_cec_adap:
diff --git a/queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch b/queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
new file mode 100644 (file)
index 0000000..83a8be0
--- /dev/null
@@ -0,0 +1,174 @@
+From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 25 Jan 2022 22:03:58 +0000
+Subject: KVM: x86: Forcibly leave nested virt when SMM state is toggled
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream.
+
+Forcibly leave nested virtualization operation if userspace toggles SMM
+state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS.  If userspace
+forces the vCPU out of SMM while it's post-VMXON and then injects an SMI,
+vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both
+vmxon=false and smm.vmxon=false, but all other nVMX state allocated.
+
+Don't attempt to gracefully handle the transition as (a) most transitions
+are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't
+sufficient information to handle all transitions, e.g. SVM wants access
+to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede
+KVM_SET_NESTED_STATE during state restore as the latter disallows putting
+the vCPU into L2 if SMM is active, and disallows tagging the vCPU as
+being post-VMXON in SMM if SMM is not active.
+
+Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX
+due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond
+just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU
+in an architecturally impossible state.
+
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Modules linked in:
+  CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00
+  Call Trace:
+   <TASK>
+   kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123
+   kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline]
+   kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460
+   kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline]
+   kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676
+   kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline]
+   kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250
+   kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273
+   __fput+0x286/0x9f0 fs/file_table.c:311
+   task_work_run+0xdd/0x1a0 kernel/task_work.c:164
+   exit_task_work include/linux/task_work.h:32 [inline]
+   do_exit+0xb29/0x2a30 kernel/exit.c:806
+   do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+   get_signal+0x4b0/0x28c0 kernel/signal.c:2862
+   arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
+   handle_signal_work kernel/entry/common.c:148 [inline]
+   exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
+   exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
+   __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
+   syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
+   do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220125220358.2091737-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Backported-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/svm/nested.c       |   10 ++++++++--
+ arch/x86/kvm/svm/svm.c          |    2 +-
+ arch/x86/kvm/svm/svm.h          |    2 +-
+ arch/x86/kvm/vmx/nested.c       |    1 +
+ arch/x86/kvm/x86.c              |    2 ++
+ 6 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1285,6 +1285,7 @@ struct kvm_x86_ops {
+ };
+ struct kvm_x86_nested_ops {
++      void (*leave_nested)(struct kvm_vcpu *vcpu);
+       int (*check_events)(struct kvm_vcpu *vcpu);
+       bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+       int (*get_state)(struct kvm_vcpu *vcpu,
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -783,8 +783,10 @@ void svm_free_nested(struct vcpu_svm *sv
+ /*
+  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+  */
+-void svm_leave_nested(struct vcpu_svm *svm)
++void svm_leave_nested(struct kvm_vcpu *vcpu)
+ {
++      struct vcpu_svm *svm = to_svm(vcpu);
++
+       if (is_guest_mode(&svm->vcpu)) {
+               struct vmcb *hsave = svm->nested.hsave;
+               struct vmcb *vmcb = svm->vmcb;
+@@ -1185,7 +1187,7 @@ static int svm_set_nested_state(struct k
+               return -EINVAL;
+       if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+               svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+               return 0;
+       }
+@@ -1238,6 +1240,9 @@ static int svm_set_nested_state(struct k
+       copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
+       hsave->save = *save;
++      if (is_guest_mode(vcpu))
++              svm_leave_nested(vcpu);
++
+       svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
+       load_nested_vmcb_control(svm, ctl);
+       nested_prepare_vmcb_control(svm);
+@@ -1252,6 +1257,7 @@ out_free:
+ }
+ struct kvm_x86_nested_ops svm_nested_ops = {
++      .leave_nested = svm_leave_nested,
+       .check_events = svm_check_nested_events,
+       .get_nested_state_pages = svm_get_nested_state_pages,
+       .get_state = svm_get_nested_state,
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu,
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+-                      svm_leave_nested(svm);
++                      svm_leave_nested(vcpu);
+                       svm_set_gif(svm, true);
+                       /*
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -393,7 +393,7 @@ static inline bool nested_exit_on_nmi(st
+ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+                        struct vmcb *nested_vmcb);
+-void svm_leave_nested(struct vcpu_svm *svm);
++void svm_leave_nested(struct kvm_vcpu *vcpu);
+ void svm_free_nested(struct vcpu_svm *svm);
+ int svm_allocate_nested(struct vcpu_svm *svm);
+ int nested_svm_vmrun(struct vcpu_svm *svm);
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6628,6 +6628,7 @@ __init int nested_vmx_hardware_setup(int
+ }
+ struct kvm_x86_nested_ops vmx_nested_ops = {
++      .leave_nested = vmx_leave_nested,
+       .check_events = vmx_check_nested_events,
+       .hv_timer_pending = nested_vmx_preemption_timer_pending,
+       .get_state = vmx_get_nested_state,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4391,6 +4391,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_e
+                               vcpu->arch.hflags |= HF_SMM_MASK;
+                       else
+                               vcpu->arch.hflags &= ~HF_SMM_MASK;
++
++                      kvm_x86_ops.nested_ops->leave_nested(vcpu);
+                       kvm_smm_changed(vcpu);
+               }
diff --git a/queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch b/queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch
new file mode 100644 (file)
index 0000000..80a9f8c
--- /dev/null
@@ -0,0 +1,45 @@
+From 6c0e3b5ce94947b311348c367db9e11dcb2ccc93 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Wed, 12 Jan 2022 07:30:10 -0600
+Subject: net: ipa: fix atomic update in ipa_endpoint_replenish()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 6c0e3b5ce94947b311348c367db9e11dcb2ccc93 upstream.
+
+In ipa_endpoint_replenish(), if an error occurs when attempting to
+replenish a receive buffer, we just quit and try again later.  In
+that case we increment the backlog count to reflect that the attempt
+was unsuccessful.  Then, if the add_one flag was true we increment
+the backlog again.
+
+This second increment is not included in the backlog local variable
+though, and its value determines whether delayed work should be
+scheduled.  This is a bug.
+
+Fix this by determining whether 1 or 2 should be added to the
+backlog before adding it in a atomic_add_return() call.
+
+Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
+Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -918,10 +918,7 @@ static void ipa_endpoint_replenish(struc
+ try_again_later:
+       /* The last one didn't succeed, so fix the backlog */
+-      backlog = atomic_inc_return(&endpoint->replenish_backlog);
+-
+-      if (count)
+-              atomic_add(count, &endpoint->replenish_backlog);
++      backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog);
+       /* Whenever a receive buffer transaction completes we'll try to
+        * replenish again.  It's unlikely, but if we fail to supply even
diff --git a/queue-5.10/net-ipa-prevent-concurrent-replenish.patch b/queue-5.10/net-ipa-prevent-concurrent-replenish.patch
new file mode 100644 (file)
index 0000000..5b94c1d
--- /dev/null
@@ -0,0 +1,79 @@
+From 998c0bd2b3715244da7639cc4e6a2062cb79c3f4 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Wed, 12 Jan 2022 07:30:12 -0600
+Subject: net: ipa: prevent concurrent replenish
+
+From: Alex Elder <elder@linaro.org>
+
+commit 998c0bd2b3715244da7639cc4e6a2062cb79c3f4 upstream.
+
+We have seen cases where an endpoint RX completion interrupt arrives
+while replenishing for the endpoint is underway.  This causes another
+instance of replenishing to begin as part of completing the receive
+transaction.  If this occurs it can lead to transaction corruption.
+
+Use a new flag to ensure only one replenish instance for an endpoint
+executes at a time.
+
+Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |   12 ++++++++++++
+ drivers/net/ipa/ipa_endpoint.h |    2 ++
+ 2 files changed, 14 insertions(+)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -907,16 +907,27 @@ static void ipa_endpoint_replenish(struc
+               return;
+       }
++      /* If already active, just update the backlog */
++      if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
++              if (count)
++                      atomic_add(count, &endpoint->replenish_backlog);
++              return;
++      }
+       while (atomic_dec_not_zero(&endpoint->replenish_backlog))
+               if (ipa_endpoint_replenish_one(endpoint))
+                       goto try_again_later;
++
++      clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
++
+       if (count)
+               atomic_add(count, &endpoint->replenish_backlog);
+       return;
+ try_again_later:
++      clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
++
+       /* The last one didn't succeed, so fix the backlog */
+       backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog);
+@@ -1470,6 +1481,7 @@ static void ipa_endpoint_setup_one(struc
+                * backlog is the same as the maximum outstanding TREs.
+                */
+               clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
++              clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+               atomic_set(&endpoint->replenish_saved,
+                          gsi_channel_tre_max(gsi, endpoint->channel_id));
+               atomic_set(&endpoint->replenish_backlog, 0);
+--- a/drivers/net/ipa/ipa_endpoint.h
++++ b/drivers/net/ipa/ipa_endpoint.h
+@@ -43,10 +43,12 @@ enum ipa_endpoint_name {
+  * enum ipa_replenish_flag:   RX buffer replenish flags
+  *
+  * @IPA_REPLENISH_ENABLED:    Whether receive buffer replenishing is enabled
++ * @IPA_REPLENISH_ACTIVE:     Whether replenishing is underway
+  * @IPA_REPLENISH_COUNT:      Number of defined replenish flags
+  */
+ enum ipa_replenish_flag {
+       IPA_REPLENISH_ENABLED,
++      IPA_REPLENISH_ACTIVE,
+       IPA_REPLENISH_COUNT,    /* Number of flags (must be last) */
+ };
diff --git a/queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch b/queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
new file mode 100644 (file)
index 0000000..6133148
--- /dev/null
@@ -0,0 +1,89 @@
+From c1aaa01dbf4cef95af3e04a5a43986c290e06ea3 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Wed, 12 Jan 2022 07:30:11 -0600
+Subject: net: ipa: use a bitmap for endpoint replenish_enabled
+
+From: Alex Elder <elder@linaro.org>
+
+commit c1aaa01dbf4cef95af3e04a5a43986c290e06ea3 upstream.
+
+Define a new replenish_flags bitmap to contain Boolean flags
+associated with an endpoint's replenishing state.  Replace the
+replenish_enabled field with a flag in that bitmap.  This is to
+prepare for the next patch, which adds another flag.
+
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    8 ++++----
+ drivers/net/ipa/ipa_endpoint.h |   13 ++++++++++++-
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -901,7 +901,7 @@ static void ipa_endpoint_replenish(struc
+       struct gsi *gsi;
+       u32 backlog;
+-      if (!endpoint->replenish_enabled) {
++      if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
+               if (count)
+                       atomic_add(count, &endpoint->replenish_saved);
+               return;
+@@ -938,7 +938,7 @@ static void ipa_endpoint_replenish_enabl
+       u32 max_backlog;
+       u32 saved;
+-      endpoint->replenish_enabled = true;
++      set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+       while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
+               atomic_add(saved, &endpoint->replenish_backlog);
+@@ -952,7 +952,7 @@ static void ipa_endpoint_replenish_disab
+ {
+       u32 backlog;
+-      endpoint->replenish_enabled = false;
++      clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+       while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
+               atomic_add(backlog, &endpoint->replenish_saved);
+ }
+@@ -1469,7 +1469,7 @@ static void ipa_endpoint_setup_one(struc
+               /* RX transactions require a single TRE, so the maximum
+                * backlog is the same as the maximum outstanding TREs.
+                */
+-              endpoint->replenish_enabled = false;
++              clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+               atomic_set(&endpoint->replenish_saved,
+                          gsi_channel_tre_max(gsi, endpoint->channel_id));
+               atomic_set(&endpoint->replenish_backlog, 0);
+--- a/drivers/net/ipa/ipa_endpoint.h
++++ b/drivers/net/ipa/ipa_endpoint.h
+@@ -40,6 +40,17 @@ enum ipa_endpoint_name {
+ #define IPA_ENDPOINT_MAX              32      /* Max supported by driver */
+ /**
++ * enum ipa_replenish_flag:   RX buffer replenish flags
++ *
++ * @IPA_REPLENISH_ENABLED:    Whether receive buffer replenishing is enabled
++ * @IPA_REPLENISH_COUNT:      Number of defined replenish flags
++ */
++enum ipa_replenish_flag {
++      IPA_REPLENISH_ENABLED,
++      IPA_REPLENISH_COUNT,    /* Number of flags (must be last) */
++};
++
++/**
+  * struct ipa_endpoint - IPA endpoint information
+  * @channel_id:       EP's GSI channel
+  * @evt_ring_id: EP's GSI channel event ring
+@@ -60,7 +71,7 @@ struct ipa_endpoint {
+       struct net_device *netdev;
+       /* Receive buffer replenishing for RX endpoints */
+-      bool replenish_enabled;
++      DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
+       u32 replenish_ready;
+       atomic_t replenish_saved;
+       atomic_t replenish_backlog;
diff --git a/queue-5.10/perf-core-fix-cgroup-event-list-management.patch b/queue-5.10/perf-core-fix-cgroup-event-list-management.patch
new file mode 100644 (file)
index 0000000..5c4c893
--- /dev/null
@@ -0,0 +1,72 @@
+From c5de60cd622a2607c043ba65e25a6e9998a369f9 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Mon, 24 Jan 2022 11:58:08 -0800
+Subject: perf/core: Fix cgroup event list management
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit c5de60cd622a2607c043ba65e25a6e9998a369f9 upstream.
+
+The active cgroup events are managed in the per-cpu cgrp_cpuctx_list.
+This list is only accessed from current cpu and not protected by any
+locks.  But from the commit ef54c1a476ae ("perf: Rework
+perf_event_exit_event()"), it's possible to access (actually modify)
+the list from another cpu.
+
+In the perf_remove_from_context(), it can remove an event from the
+context without an IPI when the context is not active.  This is not
+safe with cgroup events which can have some active events in the
+context even if ctx->is_active is 0 at the moment.  The target cpu
+might be in the middle of list iteration at the same time.
+
+If the event is enabled when it's about to be closed, it might call
+perf_cgroup_event_disable() and list_del() with the cgrp_cpuctx_list
+on a different cpu.
+
+This resulted in a crash due to an invalid list pointer access during
+the cgroup list traversal on the cpu which the event belongs to.
+
+Let's fallback to IPI to access the cgrp_cpuctx_list from that cpu.
+Similarly, perf_install_in_context() should use IPI for the cgroup
+events too.
+
+Fixes: ef54c1a476ae ("perf: Rework perf_event_exit_event()")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20220124195808.2252071-1-namhyung@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2466,7 +2466,11 @@ static void perf_remove_from_context(str
+        * event_function_call() user.
+        */
+       raw_spin_lock_irq(&ctx->lock);
+-      if (!ctx->is_active) {
++      /*
++       * Cgroup events are per-cpu events, and must IPI because of
++       * cgrp_cpuctx_list.
++       */
++      if (!ctx->is_active && !is_cgroup_event(event)) {
+               __perf_remove_from_context(event, __get_cpu_context(ctx),
+                                          ctx, (void *)flags);
+               raw_spin_unlock_irq(&ctx->lock);
+@@ -2899,11 +2903,14 @@ perf_install_in_context(struct perf_even
+        * perf_event_attr::disabled events will not run and can be initialized
+        * without IPI. Except when this is the first event for the context, in
+        * that case we need the magic of the IPI to set ctx->is_active.
++       * Similarly, cgroup events for the context also needs the IPI to
++       * manipulate the cgrp_cpuctx_list.
+        *
+        * The IOC_ENABLE that is sure to follow the creation of a disabled
+        * event will issue the IPI and reprogram the hardware.
+        */
+-      if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
++      if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
++          ctx->nr_events && !is_cgroup_event(event)) {
+               raw_spin_lock_irq(&ctx->lock);
+               if (ctx->task == TASK_TOMBSTONE) {
+                       raw_spin_unlock_irq(&ctx->lock);
diff --git a/queue-5.10/perf-rework-perf_event_exit_event.patch b/queue-5.10/perf-rework-perf_event_exit_event.patch
new file mode 100644 (file)
index 0000000..b5c467c
--- /dev/null
@@ -0,0 +1,265 @@
+From ef54c1a476aef7eef26fe13ea10dc090952c00f8 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 8 Apr 2021 12:35:56 +0200
+Subject: perf: Rework perf_event_exit_event()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ef54c1a476aef7eef26fe13ea10dc090952c00f8 upstream.
+
+Make perf_event_exit_event() more robust, such that we can use it from
+other contexts. Specifically the up and coming remove_on_exec.
+
+For this to work we need to address a few issues. Remove_on_exec will
+not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to
+disable event_function_call() and we thus have to use
+perf_remove_from_context().
+
+When using perf_remove_from_context(), there's two races to consider.
+The first is against close(), where we can have concurrent tear-down
+of the event. The second is against child_list iteration, which should
+not find a half baked event.
+
+To address this, teach perf_remove_from_context() to special case
+!ctx->is_active and about DETACH_CHILD.
+
+[ elver@google.com: fix racing parent/child exit in sync_child_event(). ]
+Signed-off-by: Marco Elver <elver@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210408103605.1676875-2-elver@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/perf_event.h |    1 
+ kernel/events/core.c       |  144 +++++++++++++++++++++++++--------------------
+ 2 files changed, 81 insertions(+), 64 deletions(-)
+
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -607,6 +607,7 @@ struct swevent_hlist {
+ #define PERF_ATTACH_TASK_DATA 0x08
+ #define PERF_ATTACH_ITRACE    0x10
+ #define PERF_ATTACH_SCHED_CB  0x20
++#define PERF_ATTACH_CHILD     0x40
+ struct perf_cgroup;
+ struct perf_buffer;
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2276,6 +2276,26 @@ out:
+       perf_event__header_size(leader);
+ }
++static void sync_child_event(struct perf_event *child_event);
++
++static void perf_child_detach(struct perf_event *event)
++{
++      struct perf_event *parent_event = event->parent;
++
++      if (!(event->attach_state & PERF_ATTACH_CHILD))
++              return;
++
++      event->attach_state &= ~PERF_ATTACH_CHILD;
++
++      if (WARN_ON_ONCE(!parent_event))
++              return;
++
++      lockdep_assert_held(&parent_event->child_mutex);
++
++      sync_child_event(event);
++      list_del_init(&event->child_list);
++}
++
+ static bool is_orphaned_event(struct perf_event *event)
+ {
+       return event->state == PERF_EVENT_STATE_DEAD;
+@@ -2383,6 +2403,7 @@ group_sched_out(struct perf_event *group
+ }
+ #define DETACH_GROUP  0x01UL
++#define DETACH_CHILD  0x02UL
+ /*
+  * Cross CPU call to remove a performance event
+@@ -2406,6 +2427,8 @@ __perf_remove_from_context(struct perf_e
+       event_sched_out(event, cpuctx, ctx);
+       if (flags & DETACH_GROUP)
+               perf_group_detach(event);
++      if (flags & DETACH_CHILD)
++              perf_child_detach(event);
+       list_del_event(event, ctx);
+       if (!ctx->nr_events && ctx->is_active) {
+@@ -2437,25 +2460,21 @@ static void perf_remove_from_context(str
+       lockdep_assert_held(&ctx->mutex);
+-      event_function_call(event, __perf_remove_from_context, (void *)flags);
+-
+       /*
+-       * The above event_function_call() can NO-OP when it hits
+-       * TASK_TOMBSTONE. In that case we must already have been detached
+-       * from the context (by perf_event_exit_event()) but the grouping
+-       * might still be in-tact.
+-       */
+-      WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+-      if ((flags & DETACH_GROUP) &&
+-          (event->attach_state & PERF_ATTACH_GROUP)) {
+-              /*
+-               * Since in that case we cannot possibly be scheduled, simply
+-               * detach now.
+-               */
+-              raw_spin_lock_irq(&ctx->lock);
+-              perf_group_detach(event);
++       * Because of perf_event_exit_task(), perf_remove_from_context() ought
++       * to work in the face of TASK_TOMBSTONE, unlike every other
++       * event_function_call() user.
++       */
++      raw_spin_lock_irq(&ctx->lock);
++      if (!ctx->is_active) {
++              __perf_remove_from_context(event, __get_cpu_context(ctx),
++                                         ctx, (void *)flags);
+               raw_spin_unlock_irq(&ctx->lock);
++              return;
+       }
++      raw_spin_unlock_irq(&ctx->lock);
++
++      event_function_call(event, __perf_remove_from_context, (void *)flags);
+ }
+ /*
+@@ -12330,14 +12349,17 @@ void perf_pmu_migrate_context(struct pmu
+ }
+ EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+-static void sync_child_event(struct perf_event *child_event,
+-                             struct task_struct *child)
++static void sync_child_event(struct perf_event *child_event)
+ {
+       struct perf_event *parent_event = child_event->parent;
+       u64 child_val;
+-      if (child_event->attr.inherit_stat)
+-              perf_event_read_event(child_event, child);
++      if (child_event->attr.inherit_stat) {
++              struct task_struct *task = child_event->ctx->task;
++
++              if (task && task != TASK_TOMBSTONE)
++                      perf_event_read_event(child_event, task);
++      }
+       child_val = perf_event_count(child_event);
+@@ -12352,60 +12374,53 @@ static void sync_child_event(struct perf
+ }
+ static void
+-perf_event_exit_event(struct perf_event *child_event,
+-                    struct perf_event_context *child_ctx,
+-                    struct task_struct *child)
++perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+ {
+-      struct perf_event *parent_event = child_event->parent;
++      struct perf_event *parent_event = event->parent;
++      unsigned long detach_flags = 0;
+-      /*
+-       * Do not destroy the 'original' grouping; because of the context
+-       * switch optimization the original events could've ended up in a
+-       * random child task.
+-       *
+-       * If we were to destroy the original group, all group related
+-       * operations would cease to function properly after this random
+-       * child dies.
+-       *
+-       * Do destroy all inherited groups, we don't care about those
+-       * and being thorough is better.
+-       */
+-      raw_spin_lock_irq(&child_ctx->lock);
+-      WARN_ON_ONCE(child_ctx->is_active);
++      if (parent_event) {
++              /*
++               * Do not destroy the 'original' grouping; because of the
++               * context switch optimization the original events could've
++               * ended up in a random child task.
++               *
++               * If we were to destroy the original group, all group related
++               * operations would cease to function properly after this
++               * random child dies.
++               *
++               * Do destroy all inherited groups, we don't care about those
++               * and being thorough is better.
++               */
++              detach_flags = DETACH_GROUP | DETACH_CHILD;
++              mutex_lock(&parent_event->child_mutex);
++      }
+-      if (parent_event)
+-              perf_group_detach(child_event);
+-      list_del_event(child_event, child_ctx);
+-      perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
+-      raw_spin_unlock_irq(&child_ctx->lock);
++      perf_remove_from_context(event, detach_flags);
++
++      raw_spin_lock_irq(&ctx->lock);
++      if (event->state > PERF_EVENT_STATE_EXIT)
++              perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
++      raw_spin_unlock_irq(&ctx->lock);
+       /*
+-       * Parent events are governed by their filedesc, retain them.
++       * Child events can be freed.
+        */
+-      if (!parent_event) {
+-              perf_event_wakeup(child_event);
++      if (parent_event) {
++              mutex_unlock(&parent_event->child_mutex);
++              /*
++               * Kick perf_poll() for is_event_hup();
++               */
++              perf_event_wakeup(parent_event);
++              free_event(event);
++              put_event(parent_event);
+               return;
+       }
+-      /*
+-       * Child events can be cleaned up.
+-       */
+-
+-      sync_child_event(child_event, child);
+       /*
+-       * Remove this event from the parent's list
+-       */
+-      WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+-      mutex_lock(&parent_event->child_mutex);
+-      list_del_init(&child_event->child_list);
+-      mutex_unlock(&parent_event->child_mutex);
+-
+-      /*
+-       * Kick perf_poll() for is_event_hup().
++       * Parent events are governed by their filedesc, retain them.
+        */
+-      perf_event_wakeup(parent_event);
+-      free_event(child_event);
+-      put_event(parent_event);
++      perf_event_wakeup(event);
+ }
+ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
+@@ -12462,7 +12477,7 @@ static void perf_event_exit_task_context
+       perf_event_task(child, child_ctx, 0);
+       list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
+-              perf_event_exit_event(child_event, child_ctx, child);
++              perf_event_exit_event(child_event, child_ctx);
+       mutex_unlock(&child_ctx->mutex);
+@@ -12722,6 +12737,7 @@ inherit_event(struct perf_event *parent_
+        */
+       raw_spin_lock_irqsave(&child_ctx->lock, flags);
+       add_event_to_ctx(child_event, child_ctx);
++      child_event->attach_state |= PERF_ATTACH_CHILD;
+       raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+       /*
diff --git a/queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch b/queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch
new file mode 100644 (file)
index 0000000..f853f24
--- /dev/null
@@ -0,0 +1,243 @@
+From a06247c6804f1a7c86a2e5398a4c1f1db1471848 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 11 Jan 2022 15:23:09 -0800
+Subject: psi: Fix uaf issue when psi trigger is destroyed while being polled
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream.
+
+With write operation on psi files replacing old trigger with a new one,
+the lifetime of its waitqueue is totally arbitrary. Overwriting an
+existing trigger causes its waitqueue to be freed and pending poll()
+will stumble on trigger->event_wait which was destroyed.
+Fix this by disallowing to redefine an existing psi trigger. If a write
+operation is used on a file descriptor with an already existing psi
+trigger, the operation will fail with EBUSY error.
+Also bypass a check for psi_disabled in the psi_trigger_destroy as the
+flag can be flipped after the trigger is created, leading to a memory
+leak.
+
+Fixes: 0e94682b73bf ("psi: introduce psi monitor")
+Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Analyzed-by: Eric Biggers <ebiggers@kernel.org>
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
+[surenb: backported to 5.10 kernel]
+CC: stable@vger.kernel.org # 5.10
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/accounting/psi.rst |    3 +
+ include/linux/psi.h              |    2 -
+ include/linux/psi_types.h        |    3 -
+ kernel/cgroup/cgroup.c           |   11 ++++--
+ kernel/sched/psi.c               |   66 +++++++++++++++++----------------------
+ 5 files changed, 40 insertions(+), 45 deletions(-)
+
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -92,7 +92,8 @@ Triggers can be set on more than one psi
+ for the same psi metric can be specified. However for each trigger a separate
+ file descriptor is required to be able to poll it separately from others,
+ therefore for each trigger a separate open() syscall should be made even
+-when opening the same psi interface file.
++when opening the same psi interface file. Write operations to a file descriptor
++with an already existing psi trigger will fail with EBUSY.
+ Monitors activate only when system enters stall state for the monitored
+ psi metric and deactivates upon exit from the stall state. While system is
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+                       char *buf, size_t nbytes, enum psi_res res);
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
++void psi_trigger_destroy(struct psi_trigger *t);
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+                       poll_table *wait);
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -128,9 +128,6 @@ struct psi_trigger {
+        * events to one per window
+        */
+       u64 last_event_time;
+-
+-      /* Refcounting to prevent premature destruction */
+-      struct kref refcount;
+ };
+ struct psi_group {
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3601,6 +3601,12 @@ static ssize_t cgroup_pressure_write(str
+       cgroup_get(cgrp);
+       cgroup_kn_unlock(of->kn);
++      /* Allow only one trigger per file descriptor */
++      if (of->priv) {
++              cgroup_put(cgrp);
++              return -EBUSY;
++      }
++
+       psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       new = psi_trigger_create(psi, buf, nbytes, res);
+       if (IS_ERR(new)) {
+@@ -3608,8 +3614,7 @@ static ssize_t cgroup_pressure_write(str
+               return PTR_ERR(new);
+       }
+-      psi_trigger_replace(&of->priv, new);
+-
++      smp_store_release(&of->priv, new);
+       cgroup_put(cgrp);
+       return nbytes;
+@@ -3644,7 +3649,7 @@ static __poll_t cgroup_pressure_poll(str
+ static void cgroup_pressure_release(struct kernfs_open_file *of)
+ {
+-      psi_trigger_replace(&of->priv, NULL);
++      psi_trigger_destroy(of->priv);
+ }
+ #endif /* CONFIG_PSI */
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -1116,7 +1116,6 @@ struct psi_trigger *psi_trigger_create(s
+       t->event = 0;
+       t->last_event_time = 0;
+       init_waitqueue_head(&t->event_wait);
+-      kref_init(&t->refcount);
+       mutex_lock(&group->trigger_lock);
+@@ -1145,15 +1144,19 @@ struct psi_trigger *psi_trigger_create(s
+       return t;
+ }
+-static void psi_trigger_destroy(struct kref *ref)
++void psi_trigger_destroy(struct psi_trigger *t)
+ {
+-      struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
+-      struct psi_group *group = t->group;
++      struct psi_group *group;
+       struct task_struct *task_to_destroy = NULL;
+-      if (static_branch_likely(&psi_disabled))
++      /*
++       * We do not check psi_disabled since it might have been disabled after
++       * the trigger got created.
++       */
++      if (!t)
+               return;
++      group = t->group;
+       /*
+        * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+        * from under a polling process.
+@@ -1189,9 +1192,9 @@ static void psi_trigger_destroy(struct k
+       mutex_unlock(&group->trigger_lock);
+       /*
+-       * Wait for both *trigger_ptr from psi_trigger_replace and
+-       * poll_task RCUs to complete their read-side critical sections
+-       * before destroying the trigger and optionally the poll_task
++       * Wait for psi_schedule_poll_work RCU to complete its read-side
++       * critical section before destroying the trigger and optionally the
++       * poll_task.
+        */
+       synchronize_rcu();
+       /*
+@@ -1208,18 +1211,6 @@ static void psi_trigger_destroy(struct k
+       kfree(t);
+ }
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
+-{
+-      struct psi_trigger *old = *trigger_ptr;
+-
+-      if (static_branch_likely(&psi_disabled))
+-              return;
+-
+-      rcu_assign_pointer(*trigger_ptr, new);
+-      if (old)
+-              kref_put(&old->refcount, psi_trigger_destroy);
+-}
+-
+ __poll_t psi_trigger_poll(void **trigger_ptr,
+                               struct file *file, poll_table *wait)
+ {
+@@ -1229,24 +1220,15 @@ __poll_t psi_trigger_poll(void **trigger
+       if (static_branch_likely(&psi_disabled))
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+-      rcu_read_lock();
+-
+-      t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
+-      if (!t) {
+-              rcu_read_unlock();
++      t = smp_load_acquire(trigger_ptr);
++      if (!t)
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+-      }
+-      kref_get(&t->refcount);
+-
+-      rcu_read_unlock();
+       poll_wait(file, &t->event_wait, wait);
+       if (cmpxchg(&t->event, 1, 0) == 1)
+               ret |= EPOLLPRI;
+-      kref_put(&t->refcount, psi_trigger_destroy);
+-
+       return ret;
+ }
+@@ -1270,14 +1252,24 @@ static ssize_t psi_write(struct file *fi
+       buf[buf_size - 1] = '\0';
+-      new = psi_trigger_create(&psi_system, buf, nbytes, res);
+-      if (IS_ERR(new))
+-              return PTR_ERR(new);
+-
+       seq = file->private_data;
++
+       /* Take seq->lock to protect seq->private from concurrent writes */
+       mutex_lock(&seq->lock);
+-      psi_trigger_replace(&seq->private, new);
++
++      /* Allow only one trigger per file descriptor */
++      if (seq->private) {
++              mutex_unlock(&seq->lock);
++              return -EBUSY;
++      }
++
++      new = psi_trigger_create(&psi_system, buf, nbytes, res);
++      if (IS_ERR(new)) {
++              mutex_unlock(&seq->lock);
++              return PTR_ERR(new);
++      }
++
++      smp_store_release(&seq->private, new);
+       mutex_unlock(&seq->lock);
+       return nbytes;
+@@ -1312,7 +1304,7 @@ static int psi_fop_release(struct inode
+ {
+       struct seq_file *seq = file->private_data;
+-      psi_trigger_replace(&seq->private, NULL);
++      psi_trigger_destroy(seq->private);
+       return single_release(inode, file);
+ }
diff --git a/queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch b/queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch
new file mode 100644 (file)
index 0000000..813e0a7
--- /dev/null
@@ -0,0 +1,88 @@
+From khilman@baylibre.com  Thu Feb  3 18:02:09 2022
+From: Kevin Hilman <khilman@baylibre.com>
+Date: Wed,  2 Feb 2022 11:57:05 -0800
+Subject: Revert "drivers: bus: simple-pm-bus: Add support for probing simple bus only devices"
+To: stable@vger.kernel.org
+Cc: Saravana Kannan <saravanak@google.com>
+Message-ID: <20220202195705.3598798-1-khilman@baylibre.com>
+
+From: Kevin Hilman <khilman@baylibre.com>
+
+This reverts commit d5f13bbb51046537b2c2b9868177fb8fe8a6a6e9 which is
+commit 98e96cf80045a383fcc47c58dd4e87b3ae587b3e upstream.
+
+This change related to fw_devlink was backported to v5.10 but has
+severaly other dependencies that were not backported.  As discussed
+with the original author, the best approach for v5.10 is to revert.
+
+Link: https://lore.kernel.org/linux-omap/7hk0efmfzo.fsf@baylibre.com
+Acked-by: Saravana Kannan <saravanak@google.com>
+Signed-off-by: Kevin Hilman <khilman@baylibre.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/simple-pm-bus.c |   39 +--------------------------------------
+ 1 file changed, 1 insertion(+), 38 deletions(-)
+
+--- a/drivers/bus/simple-pm-bus.c
++++ b/drivers/bus/simple-pm-bus.c
+@@ -16,33 +16,7 @@
+ static int simple_pm_bus_probe(struct platform_device *pdev)
+ {
+-      const struct device *dev = &pdev->dev;
+-      struct device_node *np = dev->of_node;
+-      const struct of_device_id *match;
+-
+-      /*
+-       * Allow user to use driver_override to bind this driver to a
+-       * transparent bus device which has a different compatible string
+-       * that's not listed in simple_pm_bus_of_match. We don't want to do any
+-       * of the simple-pm-bus tasks for these devices, so return early.
+-       */
+-      if (pdev->driver_override)
+-              return 0;
+-
+-      match = of_match_device(dev->driver->of_match_table, dev);
+-      /*
+-       * These are transparent bus devices (not simple-pm-bus matches) that
+-       * have their child nodes populated automatically.  So, don't need to
+-       * do anything more. We only match with the device if this driver is
+-       * the most specific match because we don't want to incorrectly bind to
+-       * a device that has a more specific driver.
+-       */
+-      if (match && match->data) {
+-              if (of_property_match_string(np, "compatible", match->compatible) == 0)
+-                      return 0;
+-              else
+-                      return -ENODEV;
+-      }
++      struct device_node *np = pdev->dev.of_node;
+       dev_dbg(&pdev->dev, "%s\n", __func__);
+@@ -56,25 +30,14 @@ static int simple_pm_bus_probe(struct pl
+ static int simple_pm_bus_remove(struct platform_device *pdev)
+ {
+-      const void *data = of_device_get_match_data(&pdev->dev);
+-
+-      if (pdev->driver_override || data)
+-              return 0;
+-
+       dev_dbg(&pdev->dev, "%s\n", __func__);
+       pm_runtime_disable(&pdev->dev);
+       return 0;
+ }
+-#define ONLY_BUS      ((void *) 1) /* Match if the device is only a bus. */
+-
+ static const struct of_device_id simple_pm_bus_of_match[] = {
+       { .compatible = "simple-pm-bus", },
+-      { .compatible = "simple-bus",   .data = ONLY_BUS },
+-      { .compatible = "simple-mfd",   .data = ONLY_BUS },
+-      { .compatible = "isa",          .data = ONLY_BUS },
+-      { .compatible = "arm,amba-bus", .data = ONLY_BUS },
+       { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, simple_pm_bus_of_match);
index 1985dac73b79d70b35d4c32742472150908e0c09..80303d82becd070684ad0740f0a6bb7200ad72e3 100644 (file)
@@ -1 +1,12 @@
 pci-pciehp-fix-infinite-loop-in-irq-handler-upon-power-fault.patch
+net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch
+net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
+net-ipa-prevent-concurrent-replenish.patch
+revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch
+kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
+psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch
+perf-rework-perf_event_exit_event.patch
+perf-core-fix-cgroup-event-list-management.patch
+x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch
+x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch
+drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
diff --git a/queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch b/queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch
new file mode 100644 (file)
index 0000000..96b6959
--- /dev/null
@@ -0,0 +1,33 @@
+From e464121f2d40eabc7d11823fb26db807ce945df4 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 21 Jan 2022 09:47:38 -0800
+Subject: x86/cpu: Add Xeon Icelake-D to list of CPUs that support PPIN
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit e464121f2d40eabc7d11823fb26db807ce945df4 upstream.
+
+Missed adding the Icelake-D CPU to the list. It uses the same MSRs
+to control and read the inventory number as all the other models.
+
+Fixes: dc6b025de95b ("x86/mce: Add Xeon Icelake to list of CPUs that support PPIN")
+Reported-by: Ailin Xu <ailin.xu@intel.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220121174743.1875294-2-tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mce/intel.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/mce/intel.c
++++ b/arch/x86/kernel/cpu/mce/intel.c
+@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuin
+       case INTEL_FAM6_BROADWELL_X:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_ICELAKE_X:
++      case INTEL_FAM6_ICELAKE_D:
+       case INTEL_FAM6_SAPPHIRERAPIDS_X:
+       case INTEL_FAM6_XEON_PHI_KNL:
+       case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch b/queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch
new file mode 100644 (file)
index 0000000..4fc6b05
--- /dev/null
@@ -0,0 +1,29 @@
+From a331f5fdd36dba1ffb0239a4dfaaf1df91ff1aab Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Fri, 19 Mar 2021 10:39:19 -0700
+Subject: x86/mce: Add Xeon Sapphire Rapids to list of CPUs that support PPIN
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit a331f5fdd36dba1ffb0239a4dfaaf1df91ff1aab upstream.
+
+New CPU model, same MSRs to control and read the inventory number.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/20210319173919.291428-1-tony.luck@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mce/intel.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/mce/intel.c
++++ b/arch/x86/kernel/cpu/mce/intel.c
+@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuin
+       case INTEL_FAM6_BROADWELL_X:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_ICELAKE_X:
++      case INTEL_FAM6_SAPPHIRERAPIDS_X:
+       case INTEL_FAM6_XEON_PHI_KNL:
+       case INTEL_FAM6_XEON_PHI_KNM: