From 218f9d8e70afe70522543a8315b0cd2ee3cbb4d3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Feb 2022 19:08:07 +0100 Subject: [PATCH] 5.10-stable patches added patches: drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch net-ipa-prevent-concurrent-replenish.patch net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch perf-core-fix-cgroup-event-list-management.patch perf-rework-perf_event_exit_event.patch psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch --- ...-sure-the-device-is-powered-with-cec.patch | 96 +++++++ ...ested-virt-when-smm-state-is-toggled.patch | 174 ++++++++++++ ...mic-update-in-ipa_endpoint_replenish.patch | 45 +++ ...net-ipa-prevent-concurrent-replenish.patch | 79 ++++++ ...itmap-for-endpoint-replenish_enabled.patch | 89 ++++++ ...ore-fix-cgroup-event-list-management.patch | 72 +++++ .../perf-rework-perf_event_exit_event.patch | 265 ++++++++++++++++++ ...gger-is-destroyed-while-being-polled.patch | 243 ++++++++++++++++ ...-for-probing-simple-bus-only-devices.patch | 88 ++++++ queue-5.10/series | 11 + ...-d-to-list-of-cpus-that-support-ppin.patch | 33 +++ ...ds-to-list-of-cpus-that-support-ppin.patch | 29 ++ 12 files changed, 1224 insertions(+) create mode 100644 queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch create mode 100644 queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch create mode 100644 queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch create mode 100644 queue-5.10/net-ipa-prevent-concurrent-replenish.patch create mode 100644 queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch create mode 100644 queue-5.10/perf-core-fix-cgroup-event-list-management.patch create mode 100644 queue-5.10/perf-rework-perf_event_exit_event.patch create mode 100644 queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch create mode 100644 queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch create mode 100644 queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch create mode 100644 queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch diff --git a/queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch b/queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch new file mode 100644 index 00000000000..1c693270d87 --- /dev/null +++ b/queue-5.10/drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch @@ -0,0 +1,96 @@ +From 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 Mon Sep 17 00:00:00 2001 +From: Maxime Ripard +Date: Thu, 19 Aug 2021 15:59:30 +0200 +Subject: drm/vc4: hdmi: Make sure the device is powered with CEC + +From: Maxime Ripard + +Commit 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 upstream. + +The original commit depended on a rework commit (724fc856c09e ("drm/vc4: +hdmi: Split the CEC disable / enable functions in two")) that +(rightfully) didn't reach stable. + +However, probably because the context changed, when the patch was +applied to stable the pm_runtime_put called got moved to the end of the +vc4_hdmi_cec_adap_enable function (that would have become +vc4_hdmi_cec_disable with the rework) to vc4_hdmi_cec_init. + +This means that at probe time, we now drop our reference to the clocks +and power domains and thus end up with a CPU hang when the CPU tries to +access registers. + +The call to pm_runtime_resume_and_get() is also problematic since the +.adap_enable CEC hook is called both to enable and to disable the +controller. That means that we'll now call pm_runtime_resume_and_get() +at disable time as well, messing with the reference counting. + +The behaviour we should have though would be to have +pm_runtime_resume_and_get() called when the CEC controller is enabled, +and pm_runtime_put when it's disabled. + +We need to move things around a bit to behave that way, but it aligns +stable with upstream. + +Cc: # 5.10.x +Cc: # 5.15.x +Cc: # 5.16.x +Reported-by: Michael Stapelberg +Signed-off-by: Maxime Ripard +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/vc4/vc4_hdmi.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +--- a/drivers/gpu/drm/vc4/vc4_hdmi.c ++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c +@@ -1402,18 +1402,18 @@ static int vc4_hdmi_cec_adap_enable(stru + u32 val; + int ret; + +- ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); +- if (ret) +- return ret; +- +- val = HDMI_READ(HDMI_CEC_CNTRL_5); +- val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET | +- VC4_HDMI_CEC_CNT_TO_4700_US_MASK | +- VC4_HDMI_CEC_CNT_TO_4500_US_MASK); +- val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) | +- ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT); +- + if (enable) { ++ ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); ++ if (ret) ++ return ret; ++ ++ val = HDMI_READ(HDMI_CEC_CNTRL_5); ++ val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET | ++ VC4_HDMI_CEC_CNT_TO_4700_US_MASK | ++ VC4_HDMI_CEC_CNT_TO_4500_US_MASK); ++ val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) | ++ ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT); ++ + HDMI_WRITE(HDMI_CEC_CNTRL_5, val | + VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET); + HDMI_WRITE(HDMI_CEC_CNTRL_5, val); +@@ -1439,7 +1439,10 @@ static int vc4_hdmi_cec_adap_enable(stru + HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, VC4_HDMI_CPU_CEC); + HDMI_WRITE(HDMI_CEC_CNTRL_5, val | + VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET); ++ ++ pm_runtime_put(&vc4_hdmi->pdev->dev); + } ++ + return 0; + } + +@@ -1531,8 +1534,6 @@ static int vc4_hdmi_cec_init(struct vc4_ + if (ret < 0) + goto err_delete_cec_adap; + +- pm_runtime_put(&vc4_hdmi->pdev->dev); +- + return 0; + + err_delete_cec_adap: diff --git a/queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch b/queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch new file mode 100644 index 00000000000..83a8be05c90 --- /dev/null +++ b/queue-5.10/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch @@ -0,0 +1,174 @@ +From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 25 Jan 2022 22:03:58 +0000 +Subject: KVM: x86: Forcibly leave nested virt when SMM state is toggled + +From: Sean Christopherson + +commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream. + +Forcibly leave nested virtualization operation if userspace toggles SMM +state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS. If userspace +forces the vCPU out of SMM while it's post-VMXON and then injects an SMI, +vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both +vmxon=false and smm.vmxon=false, but all other nVMX state allocated. + +Don't attempt to gracefully handle the transition as (a) most transitions +are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't +sufficient information to handle all transitions, e.g. SVM wants access +to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede +KVM_SET_NESTED_STATE during state restore as the latter disallows putting +the vCPU into L2 if SMM is active, and disallows tagging the vCPU as +being post-VMXON in SMM if SMM is not active. + +Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX +due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond +just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU +in an architecturally impossible state. + + WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] + WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 + Modules linked in: + CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] + RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 + Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00 + Call Trace: + + kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123 + kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline] + kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460 + kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline] + kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676 + kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline] + kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250 + kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273 + __fput+0x286/0x9f0 fs/file_table.c:311 + task_work_run+0xdd/0x1a0 kernel/task_work.c:164 + exit_task_work include/linux/task_work.h:32 [inline] + do_exit+0xb29/0x2a30 kernel/exit.c:806 + do_group_exit+0xd2/0x2f0 kernel/exit.c:935 + get_signal+0x4b0/0x28c0 kernel/signal.c:2862 + arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868 + handle_signal_work kernel/entry/common.c:148 [inline] + exit_to_user_mode_loop kernel/entry/common.c:172 [inline] + exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207 + __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] + syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 + do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x44/0xae + + +Cc: stable@vger.kernel.org +Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com +Signed-off-by: Sean Christopherson +Message-Id: <20220125220358.2091737-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Backported-by: Tadeusz Struk +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/svm/nested.c | 10 ++++++++-- + arch/x86/kvm/svm/svm.c | 2 +- + arch/x86/kvm/svm/svm.h | 2 +- + arch/x86/kvm/vmx/nested.c | 1 + + arch/x86/kvm/x86.c | 2 ++ + 6 files changed, 14 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1285,6 +1285,7 @@ struct kvm_x86_ops { + }; + + struct kvm_x86_nested_ops { ++ void (*leave_nested)(struct kvm_vcpu *vcpu); + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -783,8 +783,10 @@ void svm_free_nested(struct vcpu_svm *sv + /* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +-void svm_leave_nested(struct vcpu_svm *svm) ++void svm_leave_nested(struct kvm_vcpu *vcpu) + { ++ struct vcpu_svm *svm = to_svm(vcpu); ++ + if (is_guest_mode(&svm->vcpu)) { + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; +@@ -1185,7 +1187,7 @@ static int svm_set_nested_state(struct k + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { +- svm_leave_nested(svm); ++ svm_leave_nested(vcpu); + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + return 0; + } +@@ -1238,6 +1240,9 @@ static int svm_set_nested_state(struct k + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); + hsave->save = *save; + ++ if (is_guest_mode(vcpu)) ++ svm_leave_nested(vcpu); ++ + svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; + load_nested_vmcb_control(svm, ctl); + nested_prepare_vmcb_control(svm); +@@ -1252,6 +1257,7 @@ out_free: + } + + struct kvm_x86_nested_ops svm_nested_ops = { ++ .leave_nested = svm_leave_nested, + .check_events = svm_check_nested_events, + .get_nested_state_pages = svm_get_nested_state_pages, + .get_state = svm_get_nested_state, +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, + + if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { + if (!(efer & EFER_SVME)) { +- svm_leave_nested(svm); ++ svm_leave_nested(vcpu); + svm_set_gif(svm, true); + + /* +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -393,7 +393,7 @@ static inline bool nested_exit_on_nmi(st + + int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, + struct vmcb *nested_vmcb); +-void svm_leave_nested(struct vcpu_svm *svm); ++void svm_leave_nested(struct kvm_vcpu *vcpu); + void svm_free_nested(struct vcpu_svm *svm); + int svm_allocate_nested(struct vcpu_svm *svm); + int nested_svm_vmrun(struct vcpu_svm *svm); +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -6628,6 +6628,7 @@ __init int nested_vmx_hardware_setup(int + } + + struct kvm_x86_nested_ops vmx_nested_ops = { ++ .leave_nested = vmx_leave_nested, + .check_events = vmx_check_nested_events, + .hv_timer_pending = nested_vmx_preemption_timer_pending, + .get_state = vmx_get_nested_state, +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4391,6 +4391,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_e + vcpu->arch.hflags |= HF_SMM_MASK; + else + vcpu->arch.hflags &= ~HF_SMM_MASK; ++ ++ kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_smm_changed(vcpu); + } + diff --git a/queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch b/queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch new file mode 100644 index 00000000000..80a9f8cb83e --- /dev/null +++ b/queue-5.10/net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch @@ -0,0 +1,45 @@ +From 6c0e3b5ce94947b311348c367db9e11dcb2ccc93 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Wed, 12 Jan 2022 07:30:10 -0600 +Subject: net: ipa: fix atomic update in ipa_endpoint_replenish() + +From: Alex Elder + +commit 6c0e3b5ce94947b311348c367db9e11dcb2ccc93 upstream. + +In ipa_endpoint_replenish(), if an error occurs when attempting to +replenish a receive buffer, we just quit and try again later. In +that case we increment the backlog count to reflect that the attempt +was unsuccessful. Then, if the add_one flag was true we increment +the backlog again. + +This second increment is not included in the backlog local variable +though, and its value determines whether delayed work should be +scheduled. This is a bug. + +Fix this by determining whether 1 or 2 should be added to the +backlog before adding it in a atomic_add_return() call. + +Reviewed-by: Matthias Kaehlcke +Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints") +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/ipa_endpoint.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/net/ipa/ipa_endpoint.c ++++ b/drivers/net/ipa/ipa_endpoint.c +@@ -918,10 +918,7 @@ static void ipa_endpoint_replenish(struc + + try_again_later: + /* The last one didn't succeed, so fix the backlog */ +- backlog = atomic_inc_return(&endpoint->replenish_backlog); +- +- if (count) +- atomic_add(count, &endpoint->replenish_backlog); ++ backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog); + + /* Whenever a receive buffer transaction completes we'll try to + * replenish again. It's unlikely, but if we fail to supply even diff --git a/queue-5.10/net-ipa-prevent-concurrent-replenish.patch b/queue-5.10/net-ipa-prevent-concurrent-replenish.patch new file mode 100644 index 00000000000..5b94c1d4ae8 --- /dev/null +++ b/queue-5.10/net-ipa-prevent-concurrent-replenish.patch @@ -0,0 +1,79 @@ +From 998c0bd2b3715244da7639cc4e6a2062cb79c3f4 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Wed, 12 Jan 2022 07:30:12 -0600 +Subject: net: ipa: prevent concurrent replenish + +From: Alex Elder + +commit 998c0bd2b3715244da7639cc4e6a2062cb79c3f4 upstream. + +We have seen cases where an endpoint RX completion interrupt arrives +while replenishing for the endpoint is underway. This causes another +instance of replenishing to begin as part of completing the receive +transaction. If this occurs it can lead to transaction corruption. + +Use a new flag to ensure only one replenish instance for an endpoint +executes at a time. + +Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints") +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/ipa_endpoint.c | 12 ++++++++++++ + drivers/net/ipa/ipa_endpoint.h | 2 ++ + 2 files changed, 14 insertions(+) + +--- a/drivers/net/ipa/ipa_endpoint.c ++++ b/drivers/net/ipa/ipa_endpoint.c +@@ -907,16 +907,27 @@ static void ipa_endpoint_replenish(struc + return; + } + ++ /* If already active, just update the backlog */ ++ if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) { ++ if (count) ++ atomic_add(count, &endpoint->replenish_backlog); ++ return; ++ } + + while (atomic_dec_not_zero(&endpoint->replenish_backlog)) + if (ipa_endpoint_replenish_one(endpoint)) + goto try_again_later; ++ ++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); ++ + if (count) + atomic_add(count, &endpoint->replenish_backlog); + + return; + + try_again_later: ++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); ++ + /* The last one didn't succeed, so fix the backlog */ + backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog); + +@@ -1470,6 +1481,7 @@ static void ipa_endpoint_setup_one(struc + * backlog is the same as the maximum outstanding TREs. + */ + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); ++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); + atomic_set(&endpoint->replenish_saved, + gsi_channel_tre_max(gsi, endpoint->channel_id)); + atomic_set(&endpoint->replenish_backlog, 0); +--- a/drivers/net/ipa/ipa_endpoint.h ++++ b/drivers/net/ipa/ipa_endpoint.h +@@ -43,10 +43,12 @@ enum ipa_endpoint_name { + * enum ipa_replenish_flag: RX buffer replenish flags + * + * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled ++ * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway + * @IPA_REPLENISH_COUNT: Number of defined replenish flags + */ + enum ipa_replenish_flag { + IPA_REPLENISH_ENABLED, ++ IPA_REPLENISH_ACTIVE, + IPA_REPLENISH_COUNT, /* Number of flags (must be last) */ + }; + diff --git a/queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch b/queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch new file mode 100644 index 00000000000..61331486ca7 --- /dev/null +++ b/queue-5.10/net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch @@ -0,0 +1,89 @@ +From c1aaa01dbf4cef95af3e04a5a43986c290e06ea3 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Wed, 12 Jan 2022 07:30:11 -0600 +Subject: net: ipa: use a bitmap for endpoint replenish_enabled + +From: Alex Elder + +commit c1aaa01dbf4cef95af3e04a5a43986c290e06ea3 upstream. + +Define a new replenish_flags bitmap to contain Boolean flags +associated with an endpoint's replenishing state. Replace the +replenish_enabled field with a flag in that bitmap. This is to +prepare for the next patch, which adds another flag. + +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/ipa_endpoint.c | 8 ++++---- + drivers/net/ipa/ipa_endpoint.h | 13 ++++++++++++- + 2 files changed, 16 insertions(+), 5 deletions(-) + +--- a/drivers/net/ipa/ipa_endpoint.c ++++ b/drivers/net/ipa/ipa_endpoint.c +@@ -901,7 +901,7 @@ static void ipa_endpoint_replenish(struc + struct gsi *gsi; + u32 backlog; + +- if (!endpoint->replenish_enabled) { ++ if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) { + if (count) + atomic_add(count, &endpoint->replenish_saved); + return; +@@ -938,7 +938,7 @@ static void ipa_endpoint_replenish_enabl + u32 max_backlog; + u32 saved; + +- endpoint->replenish_enabled = true; ++ set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + while ((saved = atomic_xchg(&endpoint->replenish_saved, 0))) + atomic_add(saved, &endpoint->replenish_backlog); + +@@ -952,7 +952,7 @@ static void ipa_endpoint_replenish_disab + { + u32 backlog; + +- endpoint->replenish_enabled = false; ++ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0))) + atomic_add(backlog, &endpoint->replenish_saved); + } +@@ -1469,7 +1469,7 @@ static void ipa_endpoint_setup_one(struc + /* RX transactions require a single TRE, so the maximum + * backlog is the same as the maximum outstanding TREs. + */ +- endpoint->replenish_enabled = false; ++ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + atomic_set(&endpoint->replenish_saved, + gsi_channel_tre_max(gsi, endpoint->channel_id)); + atomic_set(&endpoint->replenish_backlog, 0); +--- a/drivers/net/ipa/ipa_endpoint.h ++++ b/drivers/net/ipa/ipa_endpoint.h +@@ -40,6 +40,17 @@ enum ipa_endpoint_name { + #define IPA_ENDPOINT_MAX 32 /* Max supported by driver */ + + /** ++ * enum ipa_replenish_flag: RX buffer replenish flags ++ * ++ * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled ++ * @IPA_REPLENISH_COUNT: Number of defined replenish flags ++ */ ++enum ipa_replenish_flag { ++ IPA_REPLENISH_ENABLED, ++ IPA_REPLENISH_COUNT, /* Number of flags (must be last) */ ++}; ++ ++/** + * struct ipa_endpoint - IPA endpoint information + * @channel_id: EP's GSI channel + * @evt_ring_id: EP's GSI channel event ring +@@ -60,7 +71,7 @@ struct ipa_endpoint { + struct net_device *netdev; + + /* Receive buffer replenishing for RX endpoints */ +- bool replenish_enabled; ++ DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT); + u32 replenish_ready; + atomic_t replenish_saved; + atomic_t replenish_backlog; diff --git a/queue-5.10/perf-core-fix-cgroup-event-list-management.patch b/queue-5.10/perf-core-fix-cgroup-event-list-management.patch new file mode 100644 index 00000000000..5c4c893a987 --- /dev/null +++ b/queue-5.10/perf-core-fix-cgroup-event-list-management.patch @@ -0,0 +1,72 @@ +From c5de60cd622a2607c043ba65e25a6e9998a369f9 Mon Sep 17 00:00:00 2001 +From: Namhyung Kim +Date: Mon, 24 Jan 2022 11:58:08 -0800 +Subject: perf/core: Fix cgroup event list management + +From: Namhyung Kim + +commit c5de60cd622a2607c043ba65e25a6e9998a369f9 upstream. + +The active cgroup events are managed in the per-cpu cgrp_cpuctx_list. +This list is only accessed from current cpu and not protected by any +locks. But from the commit ef54c1a476ae ("perf: Rework +perf_event_exit_event()"), it's possible to access (actually modify) +the list from another cpu. + +In the perf_remove_from_context(), it can remove an event from the +context without an IPI when the context is not active. This is not +safe with cgroup events which can have some active events in the +context even if ctx->is_active is 0 at the moment. The target cpu +might be in the middle of list iteration at the same time. + +If the event is enabled when it's about to be closed, it might call +perf_cgroup_event_disable() and list_del() with the cgrp_cpuctx_list +on a different cpu. + +This resulted in a crash due to an invalid list pointer access during +the cgroup list traversal on the cpu which the event belongs to. + +Let's fallback to IPI to access the cgrp_cpuctx_list from that cpu. +Similarly, perf_install_in_context() should use IPI for the cgroup +events too. + +Fixes: ef54c1a476ae ("perf: Rework perf_event_exit_event()") +Signed-off-by: Namhyung Kim +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20220124195808.2252071-1-namhyung@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2466,7 +2466,11 @@ static void perf_remove_from_context(str + * event_function_call() user. + */ + raw_spin_lock_irq(&ctx->lock); +- if (!ctx->is_active) { ++ /* ++ * Cgroup events are per-cpu events, and must IPI because of ++ * cgrp_cpuctx_list. ++ */ ++ if (!ctx->is_active && !is_cgroup_event(event)) { + __perf_remove_from_context(event, __get_cpu_context(ctx), + ctx, (void *)flags); + raw_spin_unlock_irq(&ctx->lock); +@@ -2899,11 +2903,14 @@ perf_install_in_context(struct perf_even + * perf_event_attr::disabled events will not run and can be initialized + * without IPI. Except when this is the first event for the context, in + * that case we need the magic of the IPI to set ctx->is_active. ++ * Similarly, cgroup events for the context also needs the IPI to ++ * manipulate the cgrp_cpuctx_list. + * + * The IOC_ENABLE that is sure to follow the creation of a disabled + * event will issue the IPI and reprogram the hardware. + */ +- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { ++ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ++ ctx->nr_events && !is_cgroup_event(event)) { + raw_spin_lock_irq(&ctx->lock); + if (ctx->task == TASK_TOMBSTONE) { + raw_spin_unlock_irq(&ctx->lock); diff --git a/queue-5.10/perf-rework-perf_event_exit_event.patch b/queue-5.10/perf-rework-perf_event_exit_event.patch new file mode 100644 index 00000000000..b5c467ccaf1 --- /dev/null +++ b/queue-5.10/perf-rework-perf_event_exit_event.patch @@ -0,0 +1,265 @@ +From ef54c1a476aef7eef26fe13ea10dc090952c00f8 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Thu, 8 Apr 2021 12:35:56 +0200 +Subject: perf: Rework perf_event_exit_event() + +From: Peter Zijlstra + +commit ef54c1a476aef7eef26fe13ea10dc090952c00f8 upstream. + +Make perf_event_exit_event() more robust, such that we can use it from +other contexts. Specifically the up and coming remove_on_exec. + +For this to work we need to address a few issues. Remove_on_exec will +not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to +disable event_function_call() and we thus have to use +perf_remove_from_context(). + +When using perf_remove_from_context(), there's two races to consider. +The first is against close(), where we can have concurrent tear-down +of the event. The second is against child_list iteration, which should +not find a half baked event. + +To address this, teach perf_remove_from_context() to special case +!ctx->is_active and about DETACH_CHILD. + +[ elver@google.com: fix racing parent/child exit in sync_child_event(). ] +Signed-off-by: Marco Elver +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20210408103605.1676875-2-elver@google.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/perf_event.h | 1 + kernel/events/core.c | 144 +++++++++++++++++++++++++-------------------- + 2 files changed, 81 insertions(+), 64 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -607,6 +607,7 @@ struct swevent_hlist { + #define PERF_ATTACH_TASK_DATA 0x08 + #define PERF_ATTACH_ITRACE 0x10 + #define PERF_ATTACH_SCHED_CB 0x20 ++#define PERF_ATTACH_CHILD 0x40 + + struct perf_cgroup; + struct perf_buffer; +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2276,6 +2276,26 @@ out: + perf_event__header_size(leader); + } + ++static void sync_child_event(struct perf_event *child_event); ++ ++static void perf_child_detach(struct perf_event *event) ++{ ++ struct perf_event *parent_event = event->parent; ++ ++ if (!(event->attach_state & PERF_ATTACH_CHILD)) ++ return; ++ ++ event->attach_state &= ~PERF_ATTACH_CHILD; ++ ++ if (WARN_ON_ONCE(!parent_event)) ++ return; ++ ++ lockdep_assert_held(&parent_event->child_mutex); ++ ++ sync_child_event(event); ++ list_del_init(&event->child_list); ++} ++ + static bool is_orphaned_event(struct perf_event *event) + { + return event->state == PERF_EVENT_STATE_DEAD; +@@ -2383,6 +2403,7 @@ group_sched_out(struct perf_event *group + } + + #define DETACH_GROUP 0x01UL ++#define DETACH_CHILD 0x02UL + + /* + * Cross CPU call to remove a performance event +@@ -2406,6 +2427,8 @@ __perf_remove_from_context(struct perf_e + event_sched_out(event, cpuctx, ctx); + if (flags & DETACH_GROUP) + perf_group_detach(event); ++ if (flags & DETACH_CHILD) ++ perf_child_detach(event); + list_del_event(event, ctx); + + if (!ctx->nr_events && ctx->is_active) { +@@ -2437,25 +2460,21 @@ static void perf_remove_from_context(str + + lockdep_assert_held(&ctx->mutex); + +- event_function_call(event, __perf_remove_from_context, (void *)flags); +- + /* +- * The above event_function_call() can NO-OP when it hits +- * TASK_TOMBSTONE. In that case we must already have been detached +- * from the context (by perf_event_exit_event()) but the grouping +- * might still be in-tact. +- */ +- WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); +- if ((flags & DETACH_GROUP) && +- (event->attach_state & PERF_ATTACH_GROUP)) { +- /* +- * Since in that case we cannot possibly be scheduled, simply +- * detach now. +- */ +- raw_spin_lock_irq(&ctx->lock); +- perf_group_detach(event); ++ * Because of perf_event_exit_task(), perf_remove_from_context() ought ++ * to work in the face of TASK_TOMBSTONE, unlike every other ++ * event_function_call() user. ++ */ ++ raw_spin_lock_irq(&ctx->lock); ++ if (!ctx->is_active) { ++ __perf_remove_from_context(event, __get_cpu_context(ctx), ++ ctx, (void *)flags); + raw_spin_unlock_irq(&ctx->lock); ++ return; + } ++ raw_spin_unlock_irq(&ctx->lock); ++ ++ event_function_call(event, __perf_remove_from_context, (void *)flags); + } + + /* +@@ -12330,14 +12349,17 @@ void perf_pmu_migrate_context(struct pmu + } + EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); + +-static void sync_child_event(struct perf_event *child_event, +- struct task_struct *child) ++static void sync_child_event(struct perf_event *child_event) + { + struct perf_event *parent_event = child_event->parent; + u64 child_val; + +- if (child_event->attr.inherit_stat) +- perf_event_read_event(child_event, child); ++ if (child_event->attr.inherit_stat) { ++ struct task_struct *task = child_event->ctx->task; ++ ++ if (task && task != TASK_TOMBSTONE) ++ perf_event_read_event(child_event, task); ++ } + + child_val = perf_event_count(child_event); + +@@ -12352,60 +12374,53 @@ static void sync_child_event(struct perf + } + + static void +-perf_event_exit_event(struct perf_event *child_event, +- struct perf_event_context *child_ctx, +- struct task_struct *child) ++perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) + { +- struct perf_event *parent_event = child_event->parent; ++ struct perf_event *parent_event = event->parent; ++ unsigned long detach_flags = 0; + +- /* +- * Do not destroy the 'original' grouping; because of the context +- * switch optimization the original events could've ended up in a +- * random child task. +- * +- * If we were to destroy the original group, all group related +- * operations would cease to function properly after this random +- * child dies. +- * +- * Do destroy all inherited groups, we don't care about those +- * and being thorough is better. +- */ +- raw_spin_lock_irq(&child_ctx->lock); +- WARN_ON_ONCE(child_ctx->is_active); ++ if (parent_event) { ++ /* ++ * Do not destroy the 'original' grouping; because of the ++ * context switch optimization the original events could've ++ * ended up in a random child task. ++ * ++ * If we were to destroy the original group, all group related ++ * operations would cease to function properly after this ++ * random child dies. ++ * ++ * Do destroy all inherited groups, we don't care about those ++ * and being thorough is better. ++ */ ++ detach_flags = DETACH_GROUP | DETACH_CHILD; ++ mutex_lock(&parent_event->child_mutex); ++ } + +- if (parent_event) +- perf_group_detach(child_event); +- list_del_event(child_event, child_ctx); +- perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */ +- raw_spin_unlock_irq(&child_ctx->lock); ++ perf_remove_from_context(event, detach_flags); ++ ++ raw_spin_lock_irq(&ctx->lock); ++ if (event->state > PERF_EVENT_STATE_EXIT) ++ perf_event_set_state(event, PERF_EVENT_STATE_EXIT); ++ raw_spin_unlock_irq(&ctx->lock); + + /* +- * Parent events are governed by their filedesc, retain them. ++ * Child events can be freed. + */ +- if (!parent_event) { +- perf_event_wakeup(child_event); ++ if (parent_event) { ++ mutex_unlock(&parent_event->child_mutex); ++ /* ++ * Kick perf_poll() for is_event_hup(); ++ */ ++ perf_event_wakeup(parent_event); ++ free_event(event); ++ put_event(parent_event); + return; + } +- /* +- * Child events can be cleaned up. +- */ +- +- sync_child_event(child_event, child); + + /* +- * Remove this event from the parent's list +- */ +- WARN_ON_ONCE(parent_event->ctx->parent_ctx); +- mutex_lock(&parent_event->child_mutex); +- list_del_init(&child_event->child_list); +- mutex_unlock(&parent_event->child_mutex); +- +- /* +- * Kick perf_poll() for is_event_hup(). ++ * Parent events are governed by their filedesc, retain them. + */ +- perf_event_wakeup(parent_event); +- free_event(child_event); +- put_event(parent_event); ++ perf_event_wakeup(event); + } + + static void perf_event_exit_task_context(struct task_struct *child, int ctxn) +@@ -12462,7 +12477,7 @@ static void perf_event_exit_task_context + perf_event_task(child, child_ctx, 0); + + list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) +- perf_event_exit_event(child_event, child_ctx, child); ++ perf_event_exit_event(child_event, child_ctx); + + mutex_unlock(&child_ctx->mutex); + +@@ -12722,6 +12737,7 @@ inherit_event(struct perf_event *parent_ + */ + raw_spin_lock_irqsave(&child_ctx->lock, flags); + add_event_to_ctx(child_event, child_ctx); ++ child_event->attach_state |= PERF_ATTACH_CHILD; + raw_spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* diff --git a/queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch b/queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch new file mode 100644 index 00000000000..f853f246623 --- /dev/null +++ b/queue-5.10/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch @@ -0,0 +1,243 @@ +From a06247c6804f1a7c86a2e5398a4c1f1db1471848 Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Tue, 11 Jan 2022 15:23:09 -0800 +Subject: psi: Fix uaf issue when psi trigger is destroyed while being polled + +From: Suren Baghdasaryan + +commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream. + +With write operation on psi files replacing old trigger with a new one, +the lifetime of its waitqueue is totally arbitrary. Overwriting an +existing trigger causes its waitqueue to be freed and pending poll() +will stumble on trigger->event_wait which was destroyed. +Fix this by disallowing to redefine an existing psi trigger. If a write +operation is used on a file descriptor with an already existing psi +trigger, the operation will fail with EBUSY error. +Also bypass a check for psi_disabled in the psi_trigger_destroy as the +flag can be flipped after the trigger is created, leading to a memory +leak. + +Fixes: 0e94682b73bf ("psi: introduce psi monitor") +Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com +Suggested-by: Linus Torvalds +Analyzed-by: Eric Biggers +Signed-off-by: Suren Baghdasaryan +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Eric Biggers +Acked-by: Johannes Weiner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com +[surenb: backported to 5.10 kernel] +CC: stable@vger.kernel.org # 5.10 +Signed-off-by: Suren Baghdasaryan +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/accounting/psi.rst | 3 + + include/linux/psi.h | 2 - + include/linux/psi_types.h | 3 - + kernel/cgroup/cgroup.c | 11 ++++-- + kernel/sched/psi.c | 66 +++++++++++++++++---------------------- + 5 files changed, 40 insertions(+), 45 deletions(-) + +--- a/Documentation/accounting/psi.rst ++++ b/Documentation/accounting/psi.rst +@@ -92,7 +92,8 @@ Triggers can be set on more than one psi + for the same psi metric can be specified. However for each trigger a separate + file descriptor is required to be able to poll it separately from others, + therefore for each trigger a separate open() syscall should be made even +-when opening the same psi interface file. ++when opening the same psi interface file. Write operations to a file descriptor ++with an already existing psi trigger will fail with EBUSY. + + Monitors activate only when system enters stall state for the monitored + psi metric and deactivates upon exit from the stall state. While system is +--- a/include/linux/psi.h ++++ b/include/linux/psi.h +@@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct + + struct psi_trigger *psi_trigger_create(struct psi_group *group, + char *buf, size_t nbytes, enum psi_res res); +-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); ++void psi_trigger_destroy(struct psi_trigger *t); + + __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, + poll_table *wait); +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -128,9 +128,6 @@ struct psi_trigger { + * events to one per window + */ + u64 last_event_time; +- +- /* Refcounting to prevent premature destruction */ +- struct kref refcount; + }; + + struct psi_group { +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3601,6 +3601,12 @@ static ssize_t cgroup_pressure_write(str + cgroup_get(cgrp); + cgroup_kn_unlock(of->kn); + ++ /* Allow only one trigger per file descriptor */ ++ if (of->priv) { ++ cgroup_put(cgrp); ++ return -EBUSY; ++ } ++ + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; + new = psi_trigger_create(psi, buf, nbytes, res); + if (IS_ERR(new)) { +@@ -3608,8 +3614,7 @@ static ssize_t cgroup_pressure_write(str + return PTR_ERR(new); + } + +- psi_trigger_replace(&of->priv, new); +- ++ smp_store_release(&of->priv, new); + cgroup_put(cgrp); + + return nbytes; +@@ -3644,7 +3649,7 @@ static __poll_t cgroup_pressure_poll(str + + static void cgroup_pressure_release(struct kernfs_open_file *of) + { +- psi_trigger_replace(&of->priv, NULL); ++ psi_trigger_destroy(of->priv); + } + #endif /* CONFIG_PSI */ + +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -1116,7 +1116,6 @@ struct psi_trigger *psi_trigger_create(s + t->event = 0; + t->last_event_time = 0; + init_waitqueue_head(&t->event_wait); +- kref_init(&t->refcount); + + mutex_lock(&group->trigger_lock); + +@@ -1145,15 +1144,19 @@ struct psi_trigger *psi_trigger_create(s + return t; + } + +-static void psi_trigger_destroy(struct kref *ref) ++void psi_trigger_destroy(struct psi_trigger *t) + { +- struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); +- struct psi_group *group = t->group; ++ struct psi_group *group; + struct task_struct *task_to_destroy = NULL; + +- if (static_branch_likely(&psi_disabled)) ++ /* ++ * We do not check psi_disabled since it might have been disabled after ++ * the trigger got created. ++ */ ++ if (!t) + return; + ++ group = t->group; + /* + * Wakeup waiters to stop polling. Can happen if cgroup is deleted + * from under a polling process. +@@ -1189,9 +1192,9 @@ static void psi_trigger_destroy(struct k + mutex_unlock(&group->trigger_lock); + + /* +- * Wait for both *trigger_ptr from psi_trigger_replace and +- * poll_task RCUs to complete their read-side critical sections +- * before destroying the trigger and optionally the poll_task ++ * Wait for psi_schedule_poll_work RCU to complete its read-side ++ * critical section before destroying the trigger and optionally the ++ * poll_task. + */ + synchronize_rcu(); + /* +@@ -1208,18 +1211,6 @@ static void psi_trigger_destroy(struct k + kfree(t); + } + +-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) +-{ +- struct psi_trigger *old = *trigger_ptr; +- +- if (static_branch_likely(&psi_disabled)) +- return; +- +- rcu_assign_pointer(*trigger_ptr, new); +- if (old) +- kref_put(&old->refcount, psi_trigger_destroy); +-} +- + __poll_t psi_trigger_poll(void **trigger_ptr, + struct file *file, poll_table *wait) + { +@@ -1229,24 +1220,15 @@ __poll_t psi_trigger_poll(void **trigger + if (static_branch_likely(&psi_disabled)) + return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; + +- rcu_read_lock(); +- +- t = rcu_dereference(*(void __rcu __force **)trigger_ptr); +- if (!t) { +- rcu_read_unlock(); ++ t = smp_load_acquire(trigger_ptr); ++ if (!t) + return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; +- } +- kref_get(&t->refcount); +- +- rcu_read_unlock(); + + poll_wait(file, &t->event_wait, wait); + + if (cmpxchg(&t->event, 1, 0) == 1) + ret |= EPOLLPRI; + +- kref_put(&t->refcount, psi_trigger_destroy); +- + return ret; + } + +@@ -1270,14 +1252,24 @@ static ssize_t psi_write(struct file *fi + + buf[buf_size - 1] = '\0'; + +- new = psi_trigger_create(&psi_system, buf, nbytes, res); +- if (IS_ERR(new)) +- return PTR_ERR(new); +- + seq = file->private_data; ++ + /* Take seq->lock to protect seq->private from concurrent writes */ + mutex_lock(&seq->lock); +- psi_trigger_replace(&seq->private, new); ++ ++ /* Allow only one trigger per file descriptor */ ++ if (seq->private) { ++ mutex_unlock(&seq->lock); ++ return -EBUSY; ++ } ++ ++ new = psi_trigger_create(&psi_system, buf, nbytes, res); ++ if (IS_ERR(new)) { ++ mutex_unlock(&seq->lock); ++ return PTR_ERR(new); ++ } ++ ++ smp_store_release(&seq->private, new); + mutex_unlock(&seq->lock); + + return nbytes; +@@ -1312,7 +1304,7 @@ static int psi_fop_release(struct inode + { + struct seq_file *seq = file->private_data; + +- psi_trigger_replace(&seq->private, NULL); ++ psi_trigger_destroy(seq->private); + return single_release(inode, file); + } + diff --git a/queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch b/queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch new file mode 100644 index 00000000000..813e0a7b47f --- /dev/null +++ b/queue-5.10/revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch @@ -0,0 +1,88 @@ +From khilman@baylibre.com Thu Feb 3 18:02:09 2022 +From: Kevin Hilman +Date: Wed, 2 Feb 2022 11:57:05 -0800 +Subject: Revert "drivers: bus: simple-pm-bus: Add support for probing simple bus only devices" +To: stable@vger.kernel.org +Cc: Saravana Kannan +Message-ID: <20220202195705.3598798-1-khilman@baylibre.com> + +From: Kevin Hilman + +This reverts commit d5f13bbb51046537b2c2b9868177fb8fe8a6a6e9 which is +commit 98e96cf80045a383fcc47c58dd4e87b3ae587b3e upstream. + +This change related to fw_devlink was backported to v5.10 but has +severaly other dependencies that were not backported. As discussed +with the original author, the best approach for v5.10 is to revert. + +Link: https://lore.kernel.org/linux-omap/7hk0efmfzo.fsf@baylibre.com +Acked-by: Saravana Kannan +Signed-off-by: Kevin Hilman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bus/simple-pm-bus.c | 39 +-------------------------------------- + 1 file changed, 1 insertion(+), 38 deletions(-) + +--- a/drivers/bus/simple-pm-bus.c ++++ b/drivers/bus/simple-pm-bus.c +@@ -16,33 +16,7 @@ + + static int simple_pm_bus_probe(struct platform_device *pdev) + { +- const struct device *dev = &pdev->dev; +- struct device_node *np = dev->of_node; +- const struct of_device_id *match; +- +- /* +- * Allow user to use driver_override to bind this driver to a +- * transparent bus device which has a different compatible string +- * that's not listed in simple_pm_bus_of_match. We don't want to do any +- * of the simple-pm-bus tasks for these devices, so return early. +- */ +- if (pdev->driver_override) +- return 0; +- +- match = of_match_device(dev->driver->of_match_table, dev); +- /* +- * These are transparent bus devices (not simple-pm-bus matches) that +- * have their child nodes populated automatically. So, don't need to +- * do anything more. We only match with the device if this driver is +- * the most specific match because we don't want to incorrectly bind to +- * a device that has a more specific driver. +- */ +- if (match && match->data) { +- if (of_property_match_string(np, "compatible", match->compatible) == 0) +- return 0; +- else +- return -ENODEV; +- } ++ struct device_node *np = pdev->dev.of_node; + + dev_dbg(&pdev->dev, "%s\n", __func__); + +@@ -56,25 +30,14 @@ static int simple_pm_bus_probe(struct pl + + static int simple_pm_bus_remove(struct platform_device *pdev) + { +- const void *data = of_device_get_match_data(&pdev->dev); +- +- if (pdev->driver_override || data) +- return 0; +- + dev_dbg(&pdev->dev, "%s\n", __func__); + + pm_runtime_disable(&pdev->dev); + return 0; + } + +-#define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ +- + static const struct of_device_id simple_pm_bus_of_match[] = { + { .compatible = "simple-pm-bus", }, +- { .compatible = "simple-bus", .data = ONLY_BUS }, +- { .compatible = "simple-mfd", .data = ONLY_BUS }, +- { .compatible = "isa", .data = ONLY_BUS }, +- { .compatible = "arm,amba-bus", .data = ONLY_BUS }, + { /* sentinel */ } + }; + MODULE_DEVICE_TABLE(of, simple_pm_bus_of_match); diff --git a/queue-5.10/series b/queue-5.10/series index 1985dac73b7..80303d82bec 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -1 +1,12 @@ pci-pciehp-fix-infinite-loop-in-irq-handler-upon-power-fault.patch +net-ipa-fix-atomic-update-in-ipa_endpoint_replenish.patch +net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch +net-ipa-prevent-concurrent-replenish.patch +revert-drivers-bus-simple-pm-bus-add-support-for-probing-simple-bus-only-devices.patch +kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch +psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch +perf-rework-perf_event_exit_event.patch +perf-core-fix-cgroup-event-list-management.patch +x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch +x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch +drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch diff --git a/queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch b/queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch new file mode 100644 index 00000000000..96b6959beac --- /dev/null +++ b/queue-5.10/x86-cpu-add-xeon-icelake-d-to-list-of-cpus-that-support-ppin.patch @@ -0,0 +1,33 @@ +From e464121f2d40eabc7d11823fb26db807ce945df4 Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 21 Jan 2022 09:47:38 -0800 +Subject: x86/cpu: Add Xeon Icelake-D to list of CPUs that support PPIN + +From: Tony Luck + +commit e464121f2d40eabc7d11823fb26db807ce945df4 upstream. + +Missed adding the Icelake-D CPU to the list. It uses the same MSRs +to control and read the inventory number as all the other models. + +Fixes: dc6b025de95b ("x86/mce: Add Xeon Icelake to list of CPUs that support PPIN") +Reported-by: Ailin Xu +Signed-off-by: Tony Luck +Signed-off-by: Borislav Petkov +Cc: +Link: https://lore.kernel.org/r/20220121174743.1875294-2-tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mce/intel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/mce/intel.c ++++ b/arch/x86/kernel/cpu/mce/intel.c +@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuin + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: ++ case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: diff --git a/queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch b/queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch new file mode 100644 index 00000000000..4fc6b05759a --- /dev/null +++ b/queue-5.10/x86-mce-add-xeon-sapphire-rapids-to-list-of-cpus-that-support-ppin.patch @@ -0,0 +1,29 @@ +From a331f5fdd36dba1ffb0239a4dfaaf1df91ff1aab Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 19 Mar 2021 10:39:19 -0700 +Subject: x86/mce: Add Xeon Sapphire Rapids to list of CPUs that support PPIN + +From: Tony Luck + +commit a331f5fdd36dba1ffb0239a4dfaaf1df91ff1aab upstream. + +New CPU model, same MSRs to control and read the inventory number. + +Signed-off-by: Tony Luck +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/20210319173919.291428-1-tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mce/intel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/mce/intel.c ++++ b/arch/x86/kernel/cpu/mce/intel.c +@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuin + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: ++ case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + -- 2.47.3