From 0af59cb889e575f0768dc81c7023372245bea243 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 13 Mar 2023 08:06:04 -0400 Subject: [PATCH] Fixes for 5.15 Signed-off-by: Sasha Levin --- ...-fix-struct-pid-leaks-in-oob-support.patch | 88 +++++ ...nnecessary-brackets-around-config_af.patch | 54 ++++ ...-efi-make-efi_rt_lock-a-raw_spinlock.patch | 104 ++++++ ...nitial-chip-reset-to-support-bcm5358.patch | 92 ++++++ ...-error-handling-support-for-add_disk.patch | 60 ++++ ...der-5-memory-allocation-for-tpa-data.patch | 114 +++++++ ...an-infinite-loop-error-when-len-is-0.patch | 127 ++++++++ .../brd-mark-as-nowait-compatible.patch | 62 ++++ ...g-btf_kind_var-after-array-struct-un.patch | 98 ++++++ ...ix-context-faults-during-ring-switch.patch | 49 +++ ...m-a5xx-fix-highest-bank-bit-for-a530.patch | 38 +++ ...-setting-of-the-cp_preempt_enable_lo.patch | 41 +++ ...-the-emptyness-check-in-the-preempt-.patch | 42 +++ ...msm-dpu-fix-len-of-sc7180-ctl-blocks.patch | 52 +++ ...m-msm-fix-potential-invalid-ptr-free.patch | 49 +++ ...ms-nv50-fix-nv50_wndw_new_-prototype.patch | 64 ++++ ...eau-kms-nv50-remove-unused-functions.patch | 100 ++++++ ...rlycon-replace-open-coded-strnchrnul.patch | 55 ++++ ...e-corruption-when-moving-a-directory.patch | 59 ++++ ...write-on-nat_tree_lock-during-checkp.patch | 37 +++ ...her-checkpoint-by-f2fs_get_node_info.patch | 300 ++++++++++++++++++ ...date-the-inode-page-given-data-corru.patch | 57 ++++ ...lm-add-midcomms-init-start-functions.patch | 202 ++++++++++++ ...-dlm-fix-log-of-lowcomms-vs-midcomms.patch | 38 +++ .../fs-dlm-start-midcomms-before-scand.patch | 62 ++++ ...ock-omitted-in-ice_get_module_eeprom.patch | 115 +++++++ ...ate-empty-messages-in-ila_xlat_nl_cm.patch | 113 +++++++ ...ix-pasid-directory-pointer-coherency.patch | 82 +++++ ...-add-a-timer-between-request-retries.patch | 135 ++++++++ ...ssif-increase-the-message-retry-time.patch | 36 +++ ...ain-refactor-__irq_domain_alloc_irqs.patch | 155 +++++++++ ...ze-kvm_make_vcpus_request_mask-a-bit.patch | 151 +++++++++ ...-cpumasks-for-kvm_make_all_cpus_requ.patch | 112 +++++++ ...-kvm-as-the-_very_-last-thing-during.patch | 88 +++++ ...write-guest-icr-on-avic-ipi-virtuali.patch | 75 +++++ ...icr-on-avic-ipi-delivery-failure-due.patch | 70 ++++ ...e-after-free-in-cfusbl_device_notify.patch | 86 +++++ ...ermit-port-5-to-work-without-port-6-.patch | 151 +++++++++ ..._eth_soc-fix-rx-data-corruption-issu.patch | 71 +++++ ...accessing-the-lan7800-s-internal-phy.patch | 126 ++++++++ .../net-phy-smsc-cache-interrupt-mask.patch | 99 ++++++ ...-link-up-detection-in-forced-irq-mod.patch | 79 +++++ ...hylib-get-rid-of-unnecessary-locking.patch | 174 ++++++++++ ...back-failed-while-sendmsg-with-fasto.patch | 74 +++++ ...o-set-device-wake-up-flag-when-stmma.patch | 50 +++ ...nntrack-adopt-safer-max-chain-length.patch | 53 ++++ ...ink-revert-to-dumping-mark-regardles.patch | 80 +++++ ...-fix-deadlock-due-to-missing-bh-disa.patch | 83 +++++ ...-check-of-devm_kmalloc_array-in-fdp_.patch | 49 +++ ...ock-contexts-in-the-queue-context-ca.patch | 229 +++++++++++++ ...unting-when-initial-delay-configured.patch | 178 +++++++++++ ..._platform-select-regmap-instead-of-d.patch | 50 +++ ...0rdb-fix-compatible-string-for-rev-a.patch | 36 +++ ...ix-off-on-delay-us-for-always-on-boo.patch | 57 ++++ ...se-ktime_get_boottime-to-determine-h.patch | 65 ++++ ...ncontrollable-regulators-as-always_o.patch | 58 ++++ ...-avoid-dereferening-null-regs-in-die.patch | 57 ++++ ...check-text_mutex-during-stop_machine.patch | 163 ++++++++++ ...-add-header-include-guards-to-insn.h.patch | 48 +++ ...scv-avoid-enabling-interrupts-in-die.patch | 60 ++++ ...nce_nocheck-in-imprecise-unwinding-s.patch | 99 ++++++ queue-5.15/s390-ftrace-remove-dead-code.patch | 170 ++++++++++ ...-the-proc-scsi-proc_name-directory-e.patch | 79 +++++ ...s-update-max-supported-ld-ids-to-240.patch | 60 ++++ ...t-ensuring-the-listening-side-is-up-.patch | 58 ++++ queue-5.15/series | 67 ++++ .../sunrpc-fix-a-server-shutdown-leak.patch | 48 +++ ...ne-error-when-discarding-preallocati.patch | 38 +++ 68 files changed, 5971 insertions(+) create mode 100644 queue-5.15/af_unix-fix-struct-pid-leaks-in-oob-support.patch create mode 100644 queue-5.15/af_unix-remove-unnecessary-brackets-around-config_af.patch create mode 100644 queue-5.15/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch create mode 100644 queue-5.15/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch create mode 100644 queue-5.15/block-brd-add-error-handling-support-for-add_disk.patch create mode 100644 queue-5.15/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch create mode 100644 queue-5.15/bpf-sockmap-fix-an-infinite-loop-error-when-len-is-0.patch create mode 100644 queue-5.15/brd-mark-as-nowait-compatible.patch create mode 100644 queue-5.15/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch create mode 100644 queue-5.15/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch create mode 100644 queue-5.15/drm-msm-a5xx-fix-highest-bank-bit-for-a530.patch create mode 100644 queue-5.15/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch create mode 100644 queue-5.15/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch create mode 100644 queue-5.15/drm-msm-dpu-fix-len-of-sc7180-ctl-blocks.patch create mode 100644 queue-5.15/drm-msm-fix-potential-invalid-ptr-free.patch create mode 100644 queue-5.15/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch create mode 100644 queue-5.15/drm-nouveau-kms-nv50-remove-unused-functions.patch create mode 100644 queue-5.15/efi-earlycon-replace-open-coded-strnchrnul.patch create mode 100644 queue-5.15/ext4-fix-possible-corruption-when-moving-a-directory.patch create mode 100644 queue-5.15/f2fs-avoid-down_write-on-nat_tree_lock-during-checkp.patch create mode 100644 queue-5.15/f2fs-do-not-bother-checkpoint-by-f2fs_get_node_info.patch create mode 100644 queue-5.15/f2fs-retry-to-update-the-inode-page-given-data-corru.patch create mode 100644 queue-5.15/fs-dlm-add-midcomms-init-start-functions.patch create mode 100644 queue-5.15/fs-dlm-fix-log-of-lowcomms-vs-midcomms.patch create mode 100644 queue-5.15/fs-dlm-start-midcomms-before-scand.patch create mode 100644 queue-5.15/ice-copy-last-block-omitted-in-ice_get_module_eeprom.patch create mode 100644 queue-5.15/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch create mode 100644 queue-5.15/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch create mode 100644 queue-5.15/ipmi-ssif-add-a-timer-between-request-retries.patch create mode 100644 queue-5.15/ipmi-ssif-increase-the-message-retry-time.patch create mode 100644 queue-5.15/irqdomain-refactor-__irq_domain_alloc_irqs.patch create mode 100644 queue-5.15/kvm-optimize-kvm_make_vcpus_request_mask-a-bit.patch create mode 100644 queue-5.15/kvm-pre-allocate-cpumasks-for-kvm_make_all_cpus_requ.patch create mode 100644 queue-5.15/kvm-register-dev-kvm-as-the-_very_-last-thing-during.patch create mode 100644 queue-5.15/kvm-svm-don-t-rewrite-guest-icr-on-avic-ipi-virtuali.patch create mode 100644 queue-5.15/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due.patch create mode 100644 queue-5.15/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch create mode 100644 queue-5.15/net-dsa-mt7530-permit-port-5-to-work-without-port-6-.patch create mode 100644 queue-5.15/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch create mode 100644 queue-5.15/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch create mode 100644 queue-5.15/net-phy-smsc-cache-interrupt-mask.patch create mode 100644 queue-5.15/net-phy-smsc-fix-link-up-detection-in-forced-irq-mod.patch create mode 100644 queue-5.15/net-phylib-get-rid-of-unnecessary-locking.patch create mode 100644 queue-5.15/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch create mode 100644 queue-5.15/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch create mode 100644 queue-5.15/netfilter-conntrack-adopt-safer-max-chain-length.patch create mode 100644 queue-5.15/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch create mode 100644 queue-5.15/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch create mode 100644 queue-5.15/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch create mode 100644 queue-5.15/octeontx2-af-unlock-contexts-in-the-queue-context-ca.patch create mode 100644 queue-5.15/perf-stat-fix-counting-when-initial-delay-configured.patch create mode 100644 queue-5.15/platform-x86-mlx_platform-select-regmap-instead-of-d.patch create mode 100644 queue-5.15/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch create mode 100644 queue-5.15/regulator-core-fix-off-on-delay-us-for-always-on-boo.patch create mode 100644 queue-5.15/regulator-core-use-ktime_get_boottime-to-determine-h.patch create mode 100644 queue-5.15/regulator-flag-uncontrollable-regulators-as-always_o.patch create mode 100644 queue-5.15/risc-v-avoid-dereferening-null-regs-in-die.patch create mode 100644 queue-5.15/risc-v-don-t-check-text_mutex-during-stop_machine.patch create mode 100644 queue-5.15/riscv-add-header-include-guards-to-insn.h.patch create mode 100644 queue-5.15/riscv-avoid-enabling-interrupts-in-die.patch create mode 100644 queue-5.15/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch create mode 100644 queue-5.15/s390-ftrace-remove-dead-code.patch create mode 100644 queue-5.15/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch create mode 100644 queue-5.15/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch create mode 100644 queue-5.15/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch create mode 100644 queue-5.15/sunrpc-fix-a-server-shutdown-leak.patch create mode 100644 queue-5.15/udf-fix-off-by-one-error-when-discarding-preallocati.patch diff --git a/queue-5.15/af_unix-fix-struct-pid-leaks-in-oob-support.patch b/queue-5.15/af_unix-fix-struct-pid-leaks-in-oob-support.patch new file mode 100644 index 00000000000..926c01d323d --- /dev/null +++ b/queue-5.15/af_unix-fix-struct-pid-leaks-in-oob-support.patch @@ -0,0 +1,88 @@ +From 7d3549e6637b492a96883eeafba75d3cabcfa571 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 16:45:30 +0000 +Subject: af_unix: fix struct pid leaks in OOB support + +From: Eric Dumazet + +[ Upstream commit 2aab4b96900272885bc157f8b236abf1cdc02e08 ] + +syzbot reported struct pid leak [1]. + +Issue is that queue_oob() calls maybe_add_creds() which potentially +holds a reference on a pid. + +But skb->destructor is not set (either directly or by calling +unix_scm_to_skb()) + +This means that subsequent kfree_skb() or consume_skb() would leak +this reference. + +In this fix, I chose to fully support scm even for the OOB message. + +[1] +BUG: memory leak +unreferenced object 0xffff8881053e7f80 (size 128): +comm "syz-executor242", pid 5066, jiffies 4294946079 (age 13.220s) +hex dump (first 32 bytes): +01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +backtrace: +[] alloc_pid+0x6a/0x560 kernel/pid.c:180 +[] copy_process+0x169f/0x26c0 kernel/fork.c:2285 +[] kernel_clone+0xf7/0x610 kernel/fork.c:2684 +[] __do_sys_clone+0x7c/0xb0 kernel/fork.c:2825 +[] do_syscall_x64 arch/x86/entry/common.c:50 [inline] +[] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 +[] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Reported-by: syzbot+7699d9e5635c10253a27@syzkaller.appspotmail.com +Signed-off-by: Eric Dumazet +Cc: Rao Shoaib +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230307164530.771896-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 32ddf8fe32c69..a96026dbdf94e 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1970,7 +1970,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) + + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) +-static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) ++static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, ++ struct scm_cookie *scm, bool fds_sent) + { + struct unix_sock *ousk = unix_sk(other); + struct sk_buff *skb; +@@ -1981,6 +1982,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other + if (!skb) + return err; + ++ err = unix_scm_to_skb(scm, skb, !fds_sent); ++ if (err < 0) { ++ kfree_skb(skb); ++ return err; ++ } + skb_put(skb, 1); + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); + +@@ -2108,7 +2114,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (msg->msg_flags & MSG_OOB) { +- err = queue_oob(sock, msg, other); ++ err = queue_oob(sock, msg, other, &scm, fds_sent); + if (err) + goto out_err; + sent++; +-- +2.39.2 + diff --git a/queue-5.15/af_unix-remove-unnecessary-brackets-around-config_af.patch b/queue-5.15/af_unix-remove-unnecessary-brackets-around-config_af.patch new file mode 100644 index 00000000000..37d39609496 --- /dev/null +++ b/queue-5.15/af_unix-remove-unnecessary-brackets-around-config_af.patch @@ -0,0 +1,54 @@ +From 41109f5d43d6a5ada210ef81027c62e51849738c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Mar 2022 12:23:08 +0900 +Subject: af_unix: Remove unnecessary brackets around CONFIG_AF_UNIX_OOB. + +From: Kuniyuki Iwashima + +[ Upstream commit 4edf21aa94ee33c75f819f2b6eb6dd52ef8a1628 ] + +Let's remove unnecessary brackets around CONFIG_AF_UNIX_OOB. + +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20220317032308.65372-1-kuniyu@amazon.co.jp +Signed-off-by: Jakub Kicinski +Stable-dep-of: 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 0a59a00cb5815..32ddf8fe32c69 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1969,7 +1969,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + */ + #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) + +-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) + { + struct unix_sock *ousk = unix_sk(other); +@@ -2035,7 +2035,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + + err = -EOPNOTSUPP; + if (msg->msg_flags & MSG_OOB) { +-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (len) + len--; + else +@@ -2106,7 +2106,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + sent += size; + } + +-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (msg->msg_flags & MSG_OOB) { + err = queue_oob(sock, msg, other); + if (err) +-- +2.39.2 + diff --git a/queue-5.15/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch b/queue-5.15/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch new file mode 100644 index 00000000000..f6f820e4f19 --- /dev/null +++ b/queue-5.15/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch @@ -0,0 +1,104 @@ +From b65512940fe56d2b6682e3a230cc66c4cda166e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 17:10:47 +0100 +Subject: arm64: efi: Make efi_rt_lock a raw_spinlock + +From: Pierre Gondois + +[ Upstream commit 0e68b5517d3767562889f1d83fdb828c26adb24f ] + +Running a rt-kernel base on 6.2.0-rc3-rt1 on an Ampere Altra outputs +the following: + BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 + in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9, name: kworker/u320:0 + preempt_count: 2, expected: 0 + RCU nest depth: 0, expected: 0 + 3 locks held by kworker/u320:0/9: + #0: ffff3fff8c27d128 ((wq_completion)efi_rts_wq){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41) + #1: ffff80000861bdd0 ((work_completion)(&efi_rts_work.work)){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41) + #2: ffffdf7e1ed3e460 (efi_rt_lock){+.+.}-{3:3}, at: efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101) + Preemption disabled at: + efi_virtmap_load (./arch/arm64/include/asm/mmu_context.h:248) + CPU: 0 PID: 9 Comm: kworker/u320:0 Tainted: G W 6.2.0-rc3-rt1 + Hardware name: WIWYNN Mt.Jade Server System B81.03001.0005/Mt.Jade Motherboard, BIOS 1.08.20220218 (SCP: 1.08.20220218) 2022/02/18 + Workqueue: efi_rts_wq efi_call_rts + Call trace: + dump_backtrace (arch/arm64/kernel/stacktrace.c:158) + show_stack (arch/arm64/kernel/stacktrace.c:165) + dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) + dump_stack (lib/dump_stack.c:114) + __might_resched (kernel/sched/core.c:10134) + rt_spin_lock (kernel/locking/rtmutex.c:1769 (discriminator 4)) + efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101) + [...] + +This seems to come from commit ff7a167961d1 ("arm64: efi: Execute +runtime services from a dedicated stack") which adds a spinlock. This +spinlock is taken through: +efi_call_rts() +\-efi_call_virt() + \-efi_call_virt_pointer() + \-arch_efi_call_virt_setup() + +Make 'efi_rt_lock' a raw_spinlock to avoid being preempted. + +[ardb: The EFI runtime services are called with a different set of + translation tables, and are permitted to use the SIMD registers. + The context switch code preserves/restores neither, and so EFI + calls must be made with preemption disabled, rather than only + disabling migration.] + +Fixes: ff7a167961d1 ("arm64: efi: Execute runtime services from a dedicated stack") +Signed-off-by: Pierre Gondois +Cc: # v6.1+ +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/efi.h | 6 +++--- + arch/arm64/kernel/efi.c | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h +index c5d4551a1be71..53cbbb96f7ebf 100644 +--- a/arch/arm64/include/asm/efi.h ++++ b/arch/arm64/include/asm/efi.h +@@ -25,7 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + ({ \ + efi_virtmap_load(); \ + __efi_fpsimd_begin(); \ +- spin_lock(&efi_rt_lock); \ ++ raw_spin_lock(&efi_rt_lock); \ + }) + + #define arch_efi_call_virt(p, f, args...) \ +@@ -37,12 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + + #define arch_efi_call_virt_teardown() \ + ({ \ +- spin_unlock(&efi_rt_lock); \ ++ raw_spin_unlock(&efi_rt_lock); \ + __efi_fpsimd_end(); \ + efi_virtmap_unload(); \ + }) + +-extern spinlock_t efi_rt_lock; ++extern raw_spinlock_t efi_rt_lock; + efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); + + #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c +index 386bd81ca12bb..9669f3fa2aefe 100644 +--- a/arch/arm64/kernel/efi.c ++++ b/arch/arm64/kernel/efi.c +@@ -145,7 +145,7 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) + return s; + } + +-DEFINE_SPINLOCK(efi_rt_lock); ++DEFINE_RAW_SPINLOCK(efi_rt_lock); + + asmlinkage u64 *efi_rt_stack_top __ro_after_init; + +-- +2.39.2 + diff --git a/queue-5.15/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch b/queue-5.15/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch new file mode 100644 index 00000000000..f6acbc26f4c --- /dev/null +++ b/queue-5.15/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch @@ -0,0 +1,92 @@ +From 1f0ee39715191cdf3287cdeb87494bf4acb31909 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 10:11:56 +0100 +Subject: bgmac: fix *initial* chip reset to support BCM5358 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rafał Miłecki + +[ Upstream commit f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 ] + +While bringing hardware up we should perform a full reset including the +switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what +specification says and what reference driver does. + +This seems to be critical for the BCM5358. Without this hardware doesn't +get initialized properly and doesn't seem to transmit or receive any +packets. + +Originally bgmac was calling bgmac_chip_reset() before setting +"has_robosw" property which resulted in expected behaviour. That has +changed as a side effect of adding platform device support which +regressed BCM5358 support. + +Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support") +Cc: Jon Mason +Signed-off-by: Rafał Miłecki +Reviewed-by: Leon Romanovsky +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++-- + drivers/net/ethernet/broadcom/bgmac.h | 2 ++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c +index fa2a43d465db7..f8fd65ab663ee 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac) + + if (iost & BGMAC_BCMA_IOST_ATTACHED) { + flags = BGMAC_BCMA_IOCTL_SW_CLKEN; +- if (!bgmac->has_robosw) ++ if (bgmac->in_init || !bgmac->has_robosw) + flags |= BGMAC_BCMA_IOCTL_SW_RESET; + } + bgmac_clk_enable(bgmac, flags); + } + +- if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) ++ if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw)) + bgmac_idm_write(bgmac, BCMA_IOCTL, + bgmac_idm_read(bgmac, BCMA_IOCTL) & + ~BGMAC_BCMA_IOCTL_SW_RESET); +@@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) + struct net_device *net_dev = bgmac->net_dev; + int err; + ++ bgmac->in_init = true; ++ + bgmac_chip_intrs_off(bgmac); + + net_dev->irq = bgmac->irq; +@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) + /* Omit FCS from max MTU size */ + net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN; + ++ bgmac->in_init = false; ++ + err = register_netdev(bgmac->net_dev); + if (err) { + dev_err(bgmac->dev, "Cannot register net device\n"); +diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h +index 110088e662eab..99a344175a751 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -474,6 +474,8 @@ struct bgmac { + int irq; + u32 int_mask; + ++ bool in_init; ++ + /* Current MAC state */ + int mac_speed; + int mac_duplex; +-- +2.39.2 + diff --git a/queue-5.15/block-brd-add-error-handling-support-for-add_disk.patch b/queue-5.15/block-brd-add-error-handling-support-for-add_disk.patch new file mode 100644 index 00000000000..4bcea706fda --- /dev/null +++ b/queue-5.15/block-brd-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,60 @@ +From 4ae62873c2c500df620def815e4923049836e82d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Oct 2021 16:52:07 -0700 +Subject: block/brd: add error handling support for add_disk() + +From: Luis Chamberlain + +[ Upstream commit e1528830bd4ebf435d91c154e309e6e028336210 ] + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015235219.2191207-2-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Stable-dep-of: 67205f80be99 ("brd: mark as nowait compatible") +Signed-off-by: Sasha Levin +--- + drivers/block/brd.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/block/brd.c b/drivers/block/brd.c +index 2427b2261e516..63ac5cd523408 100644 +--- a/drivers/block/brd.c ++++ b/drivers/block/brd.c +@@ -370,6 +370,7 @@ static int brd_alloc(int i) + struct brd_device *brd; + struct gendisk *disk; + char buf[DISK_NAME_LEN]; ++ int err = -ENOMEM; + + mutex_lock(&brd_devices_mutex); + list_for_each_entry(brd, &brd_devices, brd_list) { +@@ -420,16 +421,20 @@ static int brd_alloc(int i) + /* Tell the block layer that this is not a rotational device */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); +- add_disk(disk); ++ err = add_disk(disk); ++ if (err) ++ goto out_cleanup_disk; + + return 0; + ++out_cleanup_disk: ++ blk_cleanup_disk(disk); + out_free_dev: + mutex_lock(&brd_devices_mutex); + list_del(&brd->brd_list); + mutex_unlock(&brd_devices_mutex); + kfree(brd); +- return -ENOMEM; ++ return err; + } + + static void brd_probe(dev_t dev) +-- +2.39.2 + diff --git a/queue-5.15/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch b/queue-5.15/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch new file mode 100644 index 00000000000..731be978059 --- /dev/null +++ b/queue-5.15/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch @@ -0,0 +1,114 @@ +From f2ec910b1b12258e18eac322e959ada25e7359e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 18:43:57 -0800 +Subject: bnxt_en: Avoid order-5 memory allocation for TPA data + +From: Michael Chan + +[ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ] + +The driver needs to keep track of all the possible concurrent TPA (GRO/LRO) +completions on the aggregation ring. On P5 chips, the maximum number +of concurrent TPA is 256 and the amount of memory we allocate is order-5 +on systems using 4K pages. Memory allocation failure has been reported: + +NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1 +CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1 +Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022 +Call Trace: + dump_stack+0x57/0x6e + warn_alloc.cold.120+0x7b/0xdd + ? _cond_resched+0x15/0x30 + ? __alloc_pages_direct_compact+0x15f/0x170 + __alloc_pages_slowpath.constprop.108+0xc58/0xc70 + __alloc_pages_nodemask+0x2d0/0x300 + kmalloc_order+0x24/0xe0 + kmalloc_order_trace+0x19/0x80 + bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en] + ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en] + __bnxt_open_nic+0x12e/0x780 [bnxt_en] + bnxt_open+0x10b/0x240 [bnxt_en] + __dev_open+0xe9/0x180 + __dev_change_flags+0x1af/0x220 + dev_change_flags+0x21/0x60 + do_setlink+0x35c/0x1100 + +Instead of allocating this big chunk of memory and dividing it up for the +concurrent TPA instances, allocate each small chunk separately for each +TPA instance. This will reduce it to order-0 allocations. + +Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.") +Reviewed-by: Somnath Kotur +Reviewed-by: Damodharam Ammepalli +Reviewed-by: Pavan Chebbi +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index f64df4d532896..4e98e34fc46b5 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -2999,7 +2999,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) + + static void bnxt_free_tpa_info(struct bnxt *bp) + { +- int i; ++ int i, j; + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; +@@ -3007,8 +3007,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp) + kfree(rxr->rx_tpa_idx_map); + rxr->rx_tpa_idx_map = NULL; + if (rxr->rx_tpa) { +- kfree(rxr->rx_tpa[0].agg_arr); +- rxr->rx_tpa[0].agg_arr = NULL; ++ for (j = 0; j < bp->max_tpa; j++) { ++ kfree(rxr->rx_tpa[j].agg_arr); ++ rxr->rx_tpa[j].agg_arr = NULL; ++ } + } + kfree(rxr->rx_tpa); + rxr->rx_tpa = NULL; +@@ -3017,14 +3019,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp) + + static int bnxt_alloc_tpa_info(struct bnxt *bp) + { +- int i, j, total_aggs = 0; ++ int i, j; + + bp->max_tpa = MAX_TPA; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (!bp->max_tpa_v2) + return 0; + bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); +- total_aggs = bp->max_tpa * MAX_SKB_FRAGS; + } + + for (i = 0; i < bp->rx_nr_rings; i++) { +@@ -3038,12 +3039,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + continue; +- agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); +- rxr->rx_tpa[0].agg_arr = agg; +- if (!agg) +- return -ENOMEM; +- for (j = 1; j < bp->max_tpa; j++) +- rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; ++ for (j = 0; j < bp->max_tpa; j++) { ++ agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); ++ if (!agg) ++ return -ENOMEM; ++ rxr->rx_tpa[j].agg_arr = agg; ++ } + rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), + GFP_KERNEL); + if (!rxr->rx_tpa_idx_map) +-- +2.39.2 + diff --git a/queue-5.15/bpf-sockmap-fix-an-infinite-loop-error-when-len-is-0.patch b/queue-5.15/bpf-sockmap-fix-an-infinite-loop-error-when-len-is-0.patch new file mode 100644 index 00000000000..bccb47504ab --- /dev/null +++ b/queue-5.15/bpf-sockmap-fix-an-infinite-loop-error-when-len-is-0.patch @@ -0,0 +1,127 @@ +From 81697bb44d5a589434111ea81f215574ea8b0461 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 16:09:46 +0800 +Subject: bpf, sockmap: Fix an infinite loop error when len is 0 in + tcp_bpf_recvmsg_parser() + +From: Liu Jian + +[ Upstream commit d900f3d20cc3169ce42ec72acc850e662a4d4db2 ] + +When the buffer length of the recvmsg system call is 0, we got the +flollowing soft lockup problem: + +watchdog: BUG: soft lockup - CPU#3 stuck for 27s! [a.out:6149] +CPU: 3 PID: 6149 Comm: a.out Kdump: loaded Not tainted 6.2.0+ #30 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 +RIP: 0010:remove_wait_queue+0xb/0xc0 +Code: 5e 41 5f c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 57 <41> 56 41 55 41 54 55 48 89 fd 53 48 89 f3 4c 8d 6b 18 4c 8d 73 20 +RSP: 0018:ffff88811b5978b8 EFLAGS: 00000246 +RAX: 0000000000000000 RBX: ffff88811a7d3780 RCX: ffffffffb7a4d768 +RDX: dffffc0000000000 RSI: ffff88811b597908 RDI: ffff888115408040 +RBP: 1ffff110236b2f1b R08: 0000000000000000 R09: ffff88811a7d37e7 +R10: ffffed10234fa6fc R11: 0000000000000001 R12: ffff88811179b800 +R13: 0000000000000001 R14: ffff88811a7d38a8 R15: ffff88811a7d37e0 +FS: 00007f6fb5398740(0000) GS:ffff888237180000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020000000 CR3: 000000010b6ba002 CR4: 0000000000370ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tcp_msg_wait_data+0x279/0x2f0 + tcp_bpf_recvmsg_parser+0x3c6/0x490 + inet_recvmsg+0x280/0x290 + sock_recvmsg+0xfc/0x120 + ____sys_recvmsg+0x160/0x3d0 + ___sys_recvmsg+0xf0/0x180 + __sys_recvmsg+0xea/0x1a0 + do_syscall_64+0x3f/0x90 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +The logic in tcp_bpf_recvmsg_parser is as follows: + +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + wait data; + goto msg_bytes_ready; + } + +In this case, "copied" always is 0, the infinite loop occurs. + +According to the Linux system call man page, 0 should be returned in this +case. Therefore, in tcp_bpf_recvmsg_parser(), if the length is 0, directly +return. Also modify several other functions with the same problem. + +Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap") +Fixes: 9825d866ce0d ("af_unix: Implement unix_dgram_bpf_recvmsg()") +Fixes: c5d2177a72a1 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Liu Jian +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Cc: Jakub Sitnicki +Link: https://lore.kernel.org/bpf/20230303080946.1146638-1-liujian56@huawei.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 6 ++++++ + net/ipv4/udp_bpf.c | 3 +++ + net/unix/unix_bpf.c | 3 +++ + 3 files changed, 12 insertions(+) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 7f34c455651db..20ad554af3693 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -187,6 +187,9 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + ++ if (!len) ++ return 0; ++ + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); +@@ -245,6 +248,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + ++ if (!len) ++ return 0; ++ + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); +diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c +index bbe6569c9ad34..56e1047632f6b 100644 +--- a/net/ipv4/udp_bpf.c ++++ b/net/ipv4/udp_bpf.c +@@ -69,6 +69,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + ++ if (!len) ++ return 0; ++ + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); +diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c +index 452376c6f4194..5919d61d9874a 100644 +--- a/net/unix/unix_bpf.c ++++ b/net/unix/unix_bpf.c +@@ -55,6 +55,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + struct sk_psock *psock; + int copied; + ++ if (!len) ++ return 0; ++ + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return __unix_recvmsg(sk, msg, len, flags); +-- +2.39.2 + diff --git a/queue-5.15/brd-mark-as-nowait-compatible.patch b/queue-5.15/brd-mark-as-nowait-compatible.patch new file mode 100644 index 00000000000..9871b734153 --- /dev/null +++ b/queue-5.15/brd-mark-as-nowait-compatible.patch @@ -0,0 +1,62 @@ +From 9211eff97ca45f2f76de6d2be74ca3a8ccc87c2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 16:43:47 -0700 +Subject: brd: mark as nowait compatible + +From: Jens Axboe + +[ Upstream commit 67205f80be9910207481406c47f7d85e703fb2e9 ] + +By default, non-mq drivers do not support nowait. This causes io_uring +to use a slower path as the driver cannot be trust not to block. brd +can safely set the nowait flag, as worst case all it does is a NOIO +allocation. + +For io_uring, this makes a substantial difference. Before: + +submitter=0, tid=453, file=/dev/ram0, node=-1 +polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 +Engine=io_uring, sq_ring=128, cq_ring=128 +IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31 +IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32 +IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31 +IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32 +IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31 + +and after: + +submitter=0, tid=354, file=/dev/ram0, node=-1 +polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 +Engine=io_uring, sq_ring=128, cq_ring=128 +IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31 +IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31 +IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32 +IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31 +IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31 + +or about an 8x in difference. Now that brd is prepared to deal with +REQ_NOWAIT reads/writes, mark it as supporting that. + +Cc: stable@vger.kernel.org # 5.10+ +Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung.com/ +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/brd.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/block/brd.c b/drivers/block/brd.c +index 63ac5cd523408..76ce6f766d55e 100644 +--- a/drivers/block/brd.c ++++ b/drivers/block/brd.c +@@ -421,6 +421,7 @@ static int brd_alloc(int i) + /* Tell the block layer that this is not a rotational device */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); ++ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; +-- +2.39.2 + diff --git a/queue-5.15/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch b/queue-5.15/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch new file mode 100644 index 00000000000..c40b7e028fb --- /dev/null +++ b/queue-5.15/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch @@ -0,0 +1,98 @@ +From e3e3160880fbe20317e809b99b4d94aa1fdf743b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 Mar 2023 11:21:37 +0000 +Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR + +From: Lorenz Bauer + +[ Upstream commit 9b459804ff9973e173fabafba2a1319f771e85fa ] + +btf_datasec_resolve contains a bug that causes the following BTF +to fail loading: + + [1] DATASEC a size=2 vlen=2 + type_id=4 offset=0 size=1 + type_id=7 offset=1 size=1 + [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) + [3] PTR (anon) type_id=2 + [4] VAR a type_id=3 linkage=0 + [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) + [6] TYPEDEF td type_id=5 + [7] VAR b type_id=6 linkage=0 + +This error message is printed during btf_check_all_types: + + [1] DATASEC a size=2 vlen=2 + type_id=7 offset=1 size=1 Invalid type + +By tracing btf_*_resolve we can pinpoint the problem: + + btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0 + btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0 + btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0 + btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0 + btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22 + +The last invocation of btf_datasec_resolve should invoke btf_var_resolve +by means of env_stack_push, instead it returns EINVAL. The reason is that +env_stack_push is never executed for the second VAR. + + if (!env_type_is_resolve_sink(env, var_type) && + !env_type_is_resolved(env, var_type_id)) { + env_stack_set_next_member(env, i + 1); + return env_stack_push(env, var_type, var_type_id); + } + +env_type_is_resolve_sink() changes its behaviour based on resolve_mode. +For RESOLVE_PTR, we can simplify the if condition to the following: + + (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved() + +Since we're dealing with a VAR the clause evaluates to false. This is +not sufficient to trigger the bug however. The log output and EINVAL +are only generated if btf_type_id_size() fails. + + if (!btf_type_id_size(btf, &type_id, &type_size)) { + btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); + return -EINVAL; + } + +Most types are sized, so for example a VAR referring to an INT is not a +problem. The bug is only triggered if a VAR points at a modifier. Since +we skipped btf_var_resolve that modifier was also never resolved, which +means that btf_resolved_type_id returns 0 aka VOID for the modifier. +This in turn causes btf_type_id_size to return NULL, triggering EINVAL. + +To summarise, the following conditions are necessary: + +- VAR pointing at PTR, STRUCT, UNION or ARRAY +- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or + TYPE_TAG + +The fix is to reset resolve_mode to RESOLVE_TBD before attempting to +resolve a VAR from a DATASEC. + +Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec") +Signed-off-by: Lorenz Bauer +Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com +Signed-off-by: Martin KaFai Lau +Signed-off-by: Sasha Levin +--- + kernel/bpf/btf.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index 1f9369b677fe2..6c7126de5c17f 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -3655,6 +3655,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, + struct btf *btf = env->btf; + u16 i; + ++ env->resolve_mode = RESOLVE_TBD; + for_each_vsi_from(i, v->next_member, v->t, vsi) { + u32 var_type_id = vsi->type, type_id, type_size = 0; + const struct btf_type *var_type = btf_type_by_id(env->btf, +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch b/queue-5.15/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch new file mode 100644 index 00000000000..1a233d5c806 --- /dev/null +++ b/queue-5.15/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch @@ -0,0 +1,49 @@ +From 6823d5624aee11e3de3775e864c2de265d708a47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:56 +0300 +Subject: drm/msm/a5xx: fix context faults during ring switch + +From: Dmitry Baryshkov + +[ Upstream commit 32e7083429d46f29080626fe387ff90c086b1fbe ] + +The rptr_addr is set in the preempt_init_ring(), which is called from +a5xx_gpu_init(). It uses shadowptr() to set the address, however the +shadow_iova is not yet initialized at that time. Move the rptr_addr +setting to the a5xx_preempt_hw_init() which is called after setting the +shadow_iova, getting the correct value for the address. + +Fixes: 8907afb476ac ("drm/msm: Allow a5xx to mark the RPTR shadow as privileged") +Suggested-by: Rob Clark +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522640/ +Link: https://lore.kernel.org/r/20230214020956.164473-5-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +index 6e326d851ba53..e0eef47dae632 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +@@ -208,6 +208,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) + a5xx_gpu->preempt[i]->wptr = 0; + a5xx_gpu->preempt[i]->rptr = 0; + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; ++ a5xx_gpu->preempt[i]->rptr_addr = shadowptr(a5xx_gpu, gpu->rb[i]); + } + + /* Write a 0 to signal that we aren't switching pagetables */ +@@ -259,7 +260,6 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, + ptr->data = 0; + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE; + +- ptr->rptr_addr = shadowptr(a5xx_gpu, ring); + ptr->counter = counters_iova; + + return 0; +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-a5xx-fix-highest-bank-bit-for-a530.patch b/queue-5.15/drm-msm-a5xx-fix-highest-bank-bit-for-a530.patch new file mode 100644 index 00000000000..205e0d9ecdd --- /dev/null +++ b/queue-5.15/drm-msm-a5xx-fix-highest-bank-bit-for-a530.patch @@ -0,0 +1,38 @@ +From 648758a13d4c43541faca78a2826b612a2b8389e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:54 +0300 +Subject: drm/msm/a5xx: fix highest bank bit for a530 + +From: Dmitry Baryshkov + +[ Upstream commit 141f66ebbfa17cc7e2075f06c50107da978c965b ] + +A530 has highest bank bit equal to 15 (like A540). Fix values written to +REG_A5XX_RB_MODE_CNTL and REG_A5XX_TPL1_MODE_CNTL registers. + +Fixes: 1d832ab30ce6 ("drm/msm/a5xx: Add support for Adreno 508, 509, 512 GPUs") +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522639/ +Link: https://lore.kernel.org/r/20230214020956.164473-3-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +index c1bc5e2aaefe5..b8c49ba65254c 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +@@ -801,7 +801,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) + gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); + + /* Set the highest bank bit */ +- if (adreno_is_a540(adreno_gpu)) ++ if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu)) + regbit = 2; + else + regbit = 1; +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch b/queue-5.15/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch new file mode 100644 index 00000000000..dd4154329a3 --- /dev/null +++ b/queue-5.15/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch @@ -0,0 +1,41 @@ +From 00e0636d17e0d4c21311f9a0d85803a162431ec4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:53 +0300 +Subject: drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register + +From: Dmitry Baryshkov + +[ Upstream commit a7a4c19c36de1e4b99b06e4060ccc8ab837725bc ] + +Rather than writing CP_PREEMPT_ENABLE_GLOBAL twice, follow the vendor +kernel and set CP_PREEMPT_ENABLE_LOCAL register instead. a5xx_submit() +will override it during submission, but let's get the sequence correct. + +Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522638/ +Link: https://lore.kernel.org/r/20230214020956.164473-2-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +index 5e2750eb3810c..c1bc5e2aaefe5 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +@@ -153,8 +153,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ +- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); +- OUT_RING(ring, 0x02); ++ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); ++ OUT_RING(ring, 0x1); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch b/queue-5.15/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch new file mode 100644 index 00000000000..bc3067bfb10 --- /dev/null +++ b/queue-5.15/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch @@ -0,0 +1,42 @@ +From f9f4ec3b6b55bad80f950d8965fcfa800c5807f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:55 +0300 +Subject: drm/msm/a5xx: fix the emptyness check in the preempt code + +From: Dmitry Baryshkov + +[ Upstream commit b4fb748f0b734ce1d2e7834998cc599fcbd25d67 ] + +Quoting Yassine: ring->memptrs->rptr is never updated and stays 0, so +the comparison always evaluates to false and get_next_ring always +returns ring 0 thinking it isn't empty. + +Fix this by calling get_rptr() instead of reading rptr directly. + +Reported-by: Yassine Oudjana +Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522642/ +Link: https://lore.kernel.org/r/20230214020956.164473-4-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +index 8abc9a2b114a2..6e326d851ba53 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +@@ -63,7 +63,7 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->preempt_lock, flags); +- empty = (get_wptr(ring) == ring->memptrs->rptr); ++ empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); + spin_unlock_irqrestore(&ring->preempt_lock, flags); + + if (!empty) +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-dpu-fix-len-of-sc7180-ctl-blocks.patch b/queue-5.15/drm-msm-dpu-fix-len-of-sc7180-ctl-blocks.patch new file mode 100644 index 00000000000..deac0da563a --- /dev/null +++ b/queue-5.15/drm-msm-dpu-fix-len-of-sc7180-ctl-blocks.patch @@ -0,0 +1,52 @@ +From 40b7b62b8d6437767a6886bebb7ea2b49df14615 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 12 Feb 2023 01:12:13 +0200 +Subject: drm/msm/dpu: fix len of sc7180 ctl blocks + +From: Dmitry Baryshkov + +[ Upstream commit ce6bd00abc220e9edf10986234fadba6462b4abf ] + +Change sc7180's ctl block len to 0x1dc. + +Fixes: 7bdc0c4b8126 ("msm:disp:dpu1: add support for display for SC7180 target") +Signed-off-by: Dmitry Baryshkov +Reviewed-by: Abhinav Kumar +Patchwork: https://patchwork.freedesktop.org/patch/522210/ +Link: https://lore.kernel.org/r/20230211231259.1308718-5-dmitry.baryshkov@linaro.org +Signed-off-by: Abhinav Kumar +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +index 700d65e39feb0..4c65259eecb9d 100644 +--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c ++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +@@ -375,19 +375,19 @@ static const struct dpu_ctl_cfg sdm845_ctl[] = { + static const struct dpu_ctl_cfg sc7180_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, +- .base = 0x1000, .len = 0xE4, ++ .base = 0x1000, .len = 0x1dc, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, + { + .name = "ctl_1", .id = CTL_1, +- .base = 0x1200, .len = 0xE4, ++ .base = 0x1200, .len = 0x1dc, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, + { + .name = "ctl_2", .id = CTL_2, +- .base = 0x1400, .len = 0xE4, ++ .base = 0x1400, .len = 0x1dc, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, +-- +2.39.2 + diff --git a/queue-5.15/drm-msm-fix-potential-invalid-ptr-free.patch b/queue-5.15/drm-msm-fix-potential-invalid-ptr-free.patch new file mode 100644 index 00000000000..16fd68c4b9b --- /dev/null +++ b/queue-5.15/drm-msm-fix-potential-invalid-ptr-free.patch @@ -0,0 +1,49 @@ +From c53150c9c713268ba1674366f768a8b5251fcf11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 15:50:48 -0800 +Subject: drm/msm: Fix potential invalid ptr free + +From: Rob Clark + +[ Upstream commit 8a86f213f4426f19511a16d886871805b35c3acf ] + +The error path cleanup expects that chain and syncobj are either NULL or +valid pointers. But post_deps was not allocated with __GFP_ZERO. + +Fixes: ab723b7a992a ("drm/msm: Add syncobj support.") +Signed-off-by: Rob Clark +Reviewed-by: Dmitry Baryshkov +Reviewed-by: Dmitry Osipenko +Patchwork: https://patchwork.freedesktop.org/patch/523051/ +Link: https://lore.kernel.org/r/20230215235048.1166484-1-robdclark@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/msm_gem_submit.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c +index 83e6ccad77286..fc2fb1019ea1c 100644 +--- a/drivers/gpu/drm/msm/msm_gem_submit.c ++++ b/drivers/gpu/drm/msm/msm_gem_submit.c +@@ -640,8 +640,8 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, + int ret = 0; + uint32_t i, j; + +- post_deps = kmalloc_array(nr_syncobjs, sizeof(*post_deps), +- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); ++ post_deps = kcalloc(nr_syncobjs, sizeof(*post_deps), ++ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!post_deps) + return ERR_PTR(-ENOMEM); + +@@ -656,7 +656,6 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, + } + + post_deps[i].point = syncobj_desc.point; +- post_deps[i].chain = NULL; + + if (syncobj_desc.flags) { + ret = -EINVAL; +-- +2.39.2 + diff --git a/queue-5.15/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch b/queue-5.15/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch new file mode 100644 index 00000000000..7648a285bab --- /dev/null +++ b/queue-5.15/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch @@ -0,0 +1,64 @@ +From 5f847653f0c58b9a0a081e3d8dc1d154538037a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 31 Oct 2022 12:42:29 +0100 +Subject: drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype + +From: Jiri Slaby (SUSE) + +[ Upstream commit 3638a820c5c3b52f327cebb174fd4274bee08aa7 ] + +gcc-13 warns about mismatching types for enums. That revealed switched +arguments of nv50_wndw_new_(): + drivers/gpu/drm/nouveau/dispnv50/wndw.c:696:1: error: conflicting types for 'nv50_wndw_new_' due to enum/integer mismatch; have 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, u32, enum nv50_disp_interlock_type, u32, struct nv50_wndw **)' + drivers/gpu/drm/nouveau/dispnv50/wndw.h:36:5: note: previous declaration of 'nv50_wndw_new_' with type 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, enum nv50_disp_interlock_type, u32, u32, struct nv50_wndw **)' + +It can be barely visible, but the declaration says about the parameters +in the middle: + enum nv50_disp_interlock_type, + u32 interlock_data, + u32 heads, + +While the definition states differently: + u32 heads, + enum nv50_disp_interlock_type interlock_type, + u32 interlock_data, + +Unify/fix the declaration to match the definition. + +Fixes: 53e0a3e70de6 ("drm/nouveau/kms/nv50-: simplify tracking of channel interlocks") +Cc: Martin Liska +Cc: Ben Skeggs +Cc: Karol Herbst +Cc: Lyude Paul +Cc: David Airlie +Cc: Daniel Vetter +Cc: dri-devel@lists.freedesktop.org +Cc: nouveau@lists.freedesktop.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Jiri Slaby (SUSE) +Signed-off-by: Karol Herbst +Link: https://patchwork.freedesktop.org/patch/msgid/20221031114229.10289-1-jirislaby@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +index 980f8ea96d54a..6c64864da4550 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +@@ -38,8 +38,9 @@ struct nv50_wndw { + + int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *, + enum drm_plane_type, const char *name, int index, +- const u32 *format, enum nv50_disp_interlock_type, +- u32 interlock_data, u32 heads, struct nv50_wndw **); ++ const u32 *format, u32 heads, ++ enum nv50_disp_interlock_type, u32 interlock_data, ++ struct nv50_wndw **); + void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock, + struct nv50_wndw_atom *); + void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush, +-- +2.39.2 + diff --git a/queue-5.15/drm-nouveau-kms-nv50-remove-unused-functions.patch b/queue-5.15/drm-nouveau-kms-nv50-remove-unused-functions.patch new file mode 100644 index 00000000000..85a1d082e18 --- /dev/null +++ b/queue-5.15/drm-nouveau-kms-nv50-remove-unused-functions.patch @@ -0,0 +1,100 @@ +From c6f96772e0bf0057baf242bab62b60391ff25e02 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Jun 2022 20:46:06 +1000 +Subject: drm/nouveau/kms/nv50-: remove unused functions + +From: Ben Skeggs + +[ Upstream commit 89ed996b888faaf11c69bb4cbc19f21475c9050e ] + +Signed-off-by: Ben Skeggs +Reviewed-by: Dave Airlie +Signed-off-by: Dave Airlie +Stable-dep-of: 3638a820c5c3 ("drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/dispnv50/disp.c | 16 ---------------- + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 12 ------------ + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 2 -- + 3 files changed, 30 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c +index d7b9f7f8c9e31..0722b907bfcf4 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c ++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c +@@ -2622,14 +2622,6 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend) + { + struct nouveau_drm *drm = nouveau_drm(dev); + struct drm_encoder *encoder; +- struct drm_plane *plane; +- +- drm_for_each_plane(plane, dev) { +- struct nv50_wndw *wndw = nv50_wndw(plane); +- if (plane->funcs != &nv50_wndw) +- continue; +- nv50_wndw_fini(wndw); +- } + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) +@@ -2645,7 +2637,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime) + { + struct nv50_core *core = nv50_disp(dev)->core; + struct drm_encoder *encoder; +- struct drm_plane *plane; + + if (resume || runtime) + core->func->init(core); +@@ -2658,13 +2649,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime) + } + } + +- drm_for_each_plane(plane, dev) { +- struct nv50_wndw *wndw = nv50_wndw(plane); +- if (plane->funcs != &nv50_wndw) +- continue; +- nv50_wndw_init(wndw); +- } +- + return 0; + } + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c +index 8d048bacd6f02..e1e62674e82d3 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c +@@ -694,18 +694,6 @@ nv50_wndw_notify(struct nvif_notify *notify) + return NVIF_NOTIFY_KEEP; + } + +-void +-nv50_wndw_fini(struct nv50_wndw *wndw) +-{ +- nvif_notify_put(&wndw->notify); +-} +- +-void +-nv50_wndw_init(struct nv50_wndw *wndw) +-{ +- nvif_notify_get(&wndw->notify); +-} +- + static const u64 nv50_cursor_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +index f4e0c50800344..980f8ea96d54a 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +@@ -40,8 +40,6 @@ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *, + enum drm_plane_type, const char *name, int index, + const u32 *format, enum nv50_disp_interlock_type, + u32 interlock_data, u32 heads, struct nv50_wndw **); +-void nv50_wndw_init(struct nv50_wndw *); +-void nv50_wndw_fini(struct nv50_wndw *); + void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock, + struct nv50_wndw_atom *); + void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush, +-- +2.39.2 + diff --git a/queue-5.15/efi-earlycon-replace-open-coded-strnchrnul.patch b/queue-5.15/efi-earlycon-replace-open-coded-strnchrnul.patch new file mode 100644 index 00000000000..3c53131b63c --- /dev/null +++ b/queue-5.15/efi-earlycon-replace-open-coded-strnchrnul.patch @@ -0,0 +1,55 @@ +From 7f2efce312d57b155b17963471668d67a9ad4415 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Dec 2022 00:12:16 +0200 +Subject: efi/earlycon: Replace open coded strnchrnul() + +From: Andy Shevchenko + +[ Upstream commit b7a1cd243839cc1459fbc83a7a62e3b57f29f497 ] + +strnchrnul() can be called in the early stages. Replace +open coded variant in the EFI early console driver. + +Signed-off-by: Andy Shevchenko +Signed-off-by: Ard Biesheuvel +Stable-dep-of: 0e68b5517d37 ("arm64: efi: Make efi_rt_lock a raw_spinlock") +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/earlycon.c | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c +index a52236e11e5f7..fc233b6f27cb2 100644 +--- a/drivers/firmware/efi/earlycon.c ++++ b/drivers/firmware/efi/earlycon.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include + +@@ -143,16 +144,10 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num) + len = si->lfb_linelength; + + while (num) { +- unsigned int linemax; +- unsigned int h, count = 0; ++ unsigned int linemax = (si->lfb_width - efi_x) / font->width; ++ unsigned int h, count; + +- for (s = str; *s && *s != '\n'; s++) { +- if (count == num) +- break; +- count++; +- } +- +- linemax = (si->lfb_width - efi_x) / font->width; ++ count = strnchrnul(str, num, '\n') - str; + if (count > linemax) + count = linemax; + +-- +2.39.2 + diff --git a/queue-5.15/ext4-fix-possible-corruption-when-moving-a-directory.patch b/queue-5.15/ext4-fix-possible-corruption-when-moving-a-directory.patch new file mode 100644 index 00000000000..d56acdfb3b3 --- /dev/null +++ b/queue-5.15/ext4-fix-possible-corruption-when-moving-a-directory.patch @@ -0,0 +1,59 @@ +From bd21a5512e13f529b9b519d98eaf6535b6a33910 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Jan 2023 12:22:21 +0100 +Subject: ext4: Fix possible corruption when moving a directory + +From: Jan Kara + +[ Upstream commit 0813299c586b175d7edb25f56412c54b812d0379 ] + +When we are renaming a directory to a different directory, we need to +update '..' entry in the moved directory. However nothing prevents moved +directory from being modified and even converted from the inline format +to the normal format. When such race happens the rename code gets +confused and we crash. Fix the problem by locking the moved directory. + +CC: stable@vger.kernel.org +Fixes: 32f7f22c0b52 ("ext4: let ext4_rename handle inline dir") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20230126112221.11866-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/namei.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index c7791e1957f50..aa689adeeafdf 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3887,9 +3887,16 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) + goto end_rename; + } ++ /* ++ * We need to protect against old.inode directory getting ++ * converted from inline directory format into a normal one. ++ */ ++ inode_lock_nested(old.inode, I_MUTEX_NONDIR2); + retval = ext4_rename_dir_prepare(handle, &old); +- if (retval) ++ if (retval) { ++ inode_unlock(old.inode); + goto end_rename; ++ } + } + /* + * If we're renaming a file within an inline_data dir and adding or +@@ -4014,6 +4021,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + } else { + ext4_journal_stop(handle); + } ++ if (old.dir_bh) ++ inode_unlock(old.inode); + release_bh: + brelse(old.dir_bh); + brelse(old.bh); +-- +2.39.2 + diff --git a/queue-5.15/f2fs-avoid-down_write-on-nat_tree_lock-during-checkp.patch b/queue-5.15/f2fs-avoid-down_write-on-nat_tree_lock-during-checkp.patch new file mode 100644 index 00000000000..241e5392ad6 --- /dev/null +++ b/queue-5.15/f2fs-avoid-down_write-on-nat_tree_lock-during-checkp.patch @@ -0,0 +1,37 @@ +From 47bba10c64d2f1798553fa213aa214e06276e147 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Dec 2021 13:28:40 -0800 +Subject: f2fs: avoid down_write on nat_tree_lock during checkpoint + +From: Jaegeuk Kim + +[ Upstream commit 0df035c7208c5e3e2ae7685548353ae536a19015 ] + +Let's cache nat entry if there's no lock contention only. + +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 3aa51c61cb4a ("f2fs: retry to update the inode page given data corruption") +Signed-off-by: Sasha Levin +--- + fs/f2fs/node.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c +index f810c6bbeff02..7f00f3004a665 100644 +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -430,6 +430,10 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *new, *e; + ++ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ ++ if (rwsem_is_locked(&sbi->cp_global_sem)) ++ return; ++ + new = __alloc_nat_entry(sbi, nid, false); + if (!new) + return; +-- +2.39.2 + diff --git a/queue-5.15/f2fs-do-not-bother-checkpoint-by-f2fs_get_node_info.patch b/queue-5.15/f2fs-do-not-bother-checkpoint-by-f2fs_get_node_info.patch new file mode 100644 index 00000000000..d2e7f0e3c23 --- /dev/null +++ b/queue-5.15/f2fs-do-not-bother-checkpoint-by-f2fs_get_node_info.patch @@ -0,0 +1,300 @@ +From 99878c7ca8d0f1b47355ad93c39f78d97ba5fb7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Dec 2021 14:16:32 -0800 +Subject: f2fs: do not bother checkpoint by f2fs_get_node_info + +From: Jaegeuk Kim + +[ Upstream commit a9419b63bf414775e8aeee95d8c4a5e0df690748 ] + +This patch tries to mitigate lock contention between f2fs_write_checkpoint and +f2fs_get_node_info along with nat_tree_lock. + +The idea is, if checkpoint is currently running, other threads that try to grab +nat_tree_lock would be better to wait for checkpoint. + +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 3aa51c61cb4a ("f2fs: retry to update the inode page given data corruption") +Signed-off-by: Sasha Levin +--- + fs/f2fs/checkpoint.c | 2 +- + fs/f2fs/compress.c | 2 +- + fs/f2fs/data.c | 8 ++++---- + fs/f2fs/f2fs.h | 2 +- + fs/f2fs/file.c | 2 +- + fs/f2fs/gc.c | 6 +++--- + fs/f2fs/inline.c | 4 ++-- + fs/f2fs/inode.c | 2 +- + fs/f2fs/node.c | 19 ++++++++++--------- + fs/f2fs/recovery.c | 2 +- + fs/f2fs/segment.c | 2 +- + 11 files changed, 26 insertions(+), 25 deletions(-) + +diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c +index 02840dadde5d4..c68f1f8000f17 100644 +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -672,7 +672,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) + /* truncate all the data during iput */ + iput(inode); + +- err = f2fs_get_node_info(sbi, ino, &ni); ++ err = f2fs_get_node_info(sbi, ino, &ni, false); + if (err) + goto err_out; + +diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c +index 6adf047259546..4fa62f98cb515 100644 +--- a/fs/f2fs/compress.c ++++ b/fs/f2fs/compress.c +@@ -1275,7 +1275,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, + + psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + +- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); ++ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); + if (err) + goto out_put_dnode; + +diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c +index ee2909267a33b..524d4b49a5209 100644 +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1354,7 +1354,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) + return -EPERM; + +- err = f2fs_get_node_info(sbi, dn->nid, &ni); ++ err = f2fs_get_node_info(sbi, dn->nid, &ni, false); + if (err) + return err; + +@@ -1796,7 +1796,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, + if (!page) + return -ENOMEM; + +- err = f2fs_get_node_info(sbi, inode->i_ino, &ni); ++ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); + if (err) { + f2fs_put_page(page, 1); + return err; +@@ -1828,7 +1828,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, + if (!page) + return -ENOMEM; + +- err = f2fs_get_node_info(sbi, xnid, &ni); ++ err = f2fs_get_node_info(sbi, xnid, &ni, false); + if (err) { + f2fs_put_page(page, 1); + return err; +@@ -2688,7 +2688,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) + fio->need_lock = LOCK_REQ; + } + +- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); ++ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); + if (err) + goto out_writepage; + +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index a144471c53166..80e4f9afe86f7 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -3416,7 +3416,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); + bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); + bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); + int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, +- struct node_info *ni); ++ struct node_info *ni, bool checkpoint_context); + pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); + int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); + int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); +diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c +index 326c1a4c2a6ac..3be34ea4e2998 100644 +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -1232,7 +1232,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, + if (ret) + return ret; + +- ret = f2fs_get_node_info(sbi, dn.nid, &ni); ++ ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); + if (ret) { + f2fs_put_dnode(&dn); + return ret; +diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c +index fa1f5fb750b39..615b109570b05 100644 +--- a/fs/f2fs/gc.c ++++ b/fs/f2fs/gc.c +@@ -944,7 +944,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi, + continue; + } + +- if (f2fs_get_node_info(sbi, nid, &ni)) { ++ if (f2fs_get_node_info(sbi, nid, &ni, false)) { + f2fs_put_page(node_page, 1); + continue; + } +@@ -1012,7 +1012,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + if (IS_ERR(node_page)) + return false; + +- if (f2fs_get_node_info(sbi, nid, dni)) { ++ if (f2fs_get_node_info(sbi, nid, dni, false)) { + f2fs_put_page(node_page, 1); + return false; + } +@@ -1223,7 +1223,7 @@ static int move_data_block(struct inode *inode, block_t bidx, + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + +- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); ++ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); + if (err) + goto put_out; + +diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c +index bce1c2ae6d153..e4fc169a07f55 100644 +--- a/fs/f2fs/inline.c ++++ b/fs/f2fs/inline.c +@@ -146,7 +146,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) + if (err) + return err; + +- err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); ++ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false); + if (err) { + f2fs_truncate_data_blocks_range(dn, 1); + f2fs_put_dnode(dn); +@@ -797,7 +797,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, + ilen = start + len; + ilen -= start; + +- err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); ++ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false); + if (err) + goto out; + +diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c +index bd8960f4966bc..2fa0fcffe0c6d 100644 +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -888,7 +888,7 @@ void f2fs_handle_failed_inode(struct inode *inode) + * so we can prevent losing this orphan when encoutering checkpoint + * and following suddenly power-off. + */ +- err = f2fs_get_node_info(sbi, inode->i_ino, &ni); ++ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); +diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c +index 7f00f3004a665..89a7f6021c369 100644 +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -543,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) + } + + int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, +- struct node_info *ni) ++ struct node_info *ni, bool checkpoint_context) + { + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); +@@ -576,9 +576,10 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + * nat_tree_lock. Therefore, we should retry, if we failed to grab here + * while not bothering checkpoint. + */ +- if (!rwsem_is_locked(&sbi->cp_global_sem)) { ++ if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { + down_read(&curseg->journal_rwsem); +- } else if (!down_read_trylock(&curseg->journal_rwsem)) { ++ } else if (rwsem_is_contended(&nm_i->nat_tree_lock) || ++ !down_read_trylock(&curseg->journal_rwsem)) { + up_read(&nm_i->nat_tree_lock); + goto retry; + } +@@ -891,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn) + int err; + pgoff_t index; + +- err = f2fs_get_node_info(sbi, dn->nid, &ni); ++ err = f2fs_get_node_info(sbi, dn->nid, &ni, false); + if (err) + return err; + +@@ -1290,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) + goto fail; + + #ifdef CONFIG_F2FS_CHECK_FS +- err = f2fs_get_node_info(sbi, dn->nid, &new_ni); ++ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false); + if (err) { + dec_valid_node_count(sbi, dn->inode, !ofs); + goto fail; +@@ -1356,7 +1357,7 @@ static int read_node_page(struct page *page, int op_flags) + return LOCKED_PAGE; + } + +- err = f2fs_get_node_info(sbi, page->index, &ni); ++ err = f2fs_get_node_info(sbi, page->index, &ni, false); + if (err) + return err; + +@@ -1607,7 +1608,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, + nid = nid_of_node(page); + f2fs_bug_on(sbi, page->index != nid); + +- if (f2fs_get_node_info(sbi, nid, &ni)) ++ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) + goto redirty_out; + + if (wbc->for_reclaim) { +@@ -2712,7 +2713,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) + goto recover_xnid; + + /* 1: invalidate the previous xattr nid */ +- err = f2fs_get_node_info(sbi, prev_xnid, &ni); ++ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false); + if (err) + return err; + +@@ -2752,7 +2753,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) + struct page *ipage; + int err; + +- err = f2fs_get_node_info(sbi, ino, &old_ni); ++ err = f2fs_get_node_info(sbi, ino, &old_ni, false); + if (err) + return err; + +diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c +index ed21e34b59c7f..ba7eeb3c27384 100644 +--- a/fs/f2fs/recovery.c ++++ b/fs/f2fs/recovery.c +@@ -604,7 +604,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, + + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); + +- err = f2fs_get_node_info(sbi, dn.nid, &ni); ++ err = f2fs_get_node_info(sbi, dn.nid, &ni, false); + if (err) + goto err; + +diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c +index 194c0811fbdfe..58dd4de41986e 100644 +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -253,7 +253,7 @@ static int __revoke_inmem_pages(struct inode *inode, + goto next; + } + +- err = f2fs_get_node_info(sbi, dn.nid, &ni); ++ err = f2fs_get_node_info(sbi, dn.nid, &ni, false); + if (err) { + f2fs_put_dnode(&dn); + return err; +-- +2.39.2 + diff --git a/queue-5.15/f2fs-retry-to-update-the-inode-page-given-data-corru.patch b/queue-5.15/f2fs-retry-to-update-the-inode-page-given-data-corru.patch new file mode 100644 index 00000000000..c496663c421 --- /dev/null +++ b/queue-5.15/f2fs-retry-to-update-the-inode-page-given-data-corru.patch @@ -0,0 +1,57 @@ +From ee2886aebca889db23b7b2daedcc057cc5515cab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Jan 2023 15:20:09 -0800 +Subject: f2fs: retry to update the inode page given data corruption + +From: Jaegeuk Kim + +[ Upstream commit 3aa51c61cb4a4dcb40df51ac61171e9ac5a35321 ] + +If the storage gives a corrupted node block due to short power failure and +reset, f2fs stops the entire operations by setting the checkpoint failure flag. + +Let's give more chances to live by re-issuing IOs for a while in such critical +path. + +Cc: stable@vger.kernel.org +Suggested-by: Randall Huang +Suggested-by: Chao Yu +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Sasha Levin +--- + fs/f2fs/inode.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c +index 2fa0fcffe0c6d..94e21136d5790 100644 +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -681,17 +681,19 @@ void f2fs_update_inode_page(struct inode *inode) + { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *node_page; ++ int count = 0; + retry: + node_page = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) { + int err = PTR_ERR(node_page); + +- if (err == -ENOMEM) { +- cond_resched(); ++ /* The node block was truncated. */ ++ if (err == -ENOENT) ++ return; ++ ++ if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT) + goto retry; +- } else if (err != -ENOENT) { +- f2fs_stop_checkpoint(sbi, false); +- } ++ f2fs_stop_checkpoint(sbi, false); + return; + } + f2fs_update_inode(inode, node_page); +-- +2.39.2 + diff --git a/queue-5.15/fs-dlm-add-midcomms-init-start-functions.patch b/queue-5.15/fs-dlm-add-midcomms-init-start-functions.patch new file mode 100644 index 00000000000..cf2ab9fddaf --- /dev/null +++ b/queue-5.15/fs-dlm-add-midcomms-init-start-functions.patch @@ -0,0 +1,202 @@ +From 8390d724c95cb883c2cf159bf5d4f5bba18a406f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Nov 2022 17:11:46 -0500 +Subject: fs: dlm: add midcomms init/start functions + +From: Alexander Aring + +[ Upstream commit 8b0188b0d60b6f6183b48380bac49fe080c5ded9 ] + +This patch introduces leftovers of init, start, stop and exit +functionality. The dlm application layer should always call the midcomms +layer which getting aware of such event and redirect it to the lowcomms +layer. Some functionality which is currently handled inside the start +functionality of midcomms and lowcomms should be handled in the init +functionality as it only need to be initialized once when dlm is loaded. + +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Stable-dep-of: aad633dc0cf9 ("fs: dlm: start midcomms before scand") +Signed-off-by: Sasha Levin +--- + fs/dlm/lockspace.c | 5 ++--- + fs/dlm/lowcomms.c | 16 ++++++++++------ + fs/dlm/lowcomms.h | 1 + + fs/dlm/main.c | 7 +++++-- + fs/dlm/midcomms.c | 17 ++++++++++++++++- + fs/dlm/midcomms.h | 3 +++ + 6 files changed, 37 insertions(+), 12 deletions(-) + +diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c +index a1b34605742fc..1b49f375829d0 100644 +--- a/fs/dlm/lockspace.c ++++ b/fs/dlm/lockspace.c +@@ -17,7 +17,6 @@ + #include "recoverd.h" + #include "dir.h" + #include "midcomms.h" +-#include "lowcomms.h" + #include "config.h" + #include "memory.h" + #include "lock.h" +@@ -705,7 +704,7 @@ int dlm_new_lockspace(const char *name, const char *cluster, + if (!ls_count) { + dlm_scand_stop(); + dlm_midcomms_shutdown(); +- dlm_lowcomms_stop(); ++ dlm_midcomms_stop(); + } + out: + mutex_unlock(&ls_lock); +@@ -889,7 +888,7 @@ int dlm_release_lockspace(void *lockspace, int force) + if (!error) + ls_count--; + if (!ls_count) +- dlm_lowcomms_stop(); ++ dlm_midcomms_stop(); + mutex_unlock(&ls_lock); + + return error; +diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c +index d56a8f88a3852..1eb95ba7e7772 100644 +--- a/fs/dlm/lowcomms.c ++++ b/fs/dlm/lowcomms.c +@@ -1959,10 +1959,6 @@ static const struct dlm_proto_ops dlm_sctp_ops = { + int dlm_lowcomms_start(void) + { + int error = -EINVAL; +- int i; +- +- for (i = 0; i < CONN_HASH_SIZE; i++) +- INIT_HLIST_HEAD(&connection_hash[i]); + + init_local(); + if (!dlm_local_count) { +@@ -1971,8 +1967,6 @@ int dlm_lowcomms_start(void) + goto fail; + } + +- INIT_WORK(&listen_con.rwork, process_listen_recv_socket); +- + error = work_start(); + if (error) + goto fail_local; +@@ -2011,6 +2005,16 @@ int dlm_lowcomms_start(void) + return error; + } + ++void dlm_lowcomms_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < CONN_HASH_SIZE; i++) ++ INIT_HLIST_HEAD(&connection_hash[i]); ++ ++ INIT_WORK(&listen_con.rwork, process_listen_recv_socket); ++} ++ + void dlm_lowcomms_exit(void) + { + struct dlm_node_addr *na, *safe; +diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h +index 4ccae07cf0058..26433632d1717 100644 +--- a/fs/dlm/lowcomms.h ++++ b/fs/dlm/lowcomms.h +@@ -35,6 +35,7 @@ extern int dlm_allow_conn; + int dlm_lowcomms_start(void); + void dlm_lowcomms_shutdown(void); + void dlm_lowcomms_stop(void); ++void dlm_lowcomms_init(void); + void dlm_lowcomms_exit(void); + int dlm_lowcomms_close(int nodeid); + struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, +diff --git a/fs/dlm/main.c b/fs/dlm/main.c +index afc66a1346d3d..974f7ebb3fe63 100644 +--- a/fs/dlm/main.c ++++ b/fs/dlm/main.c +@@ -17,7 +17,7 @@ + #include "user.h" + #include "memory.h" + #include "config.h" +-#include "lowcomms.h" ++#include "midcomms.h" + + static int __init init_dlm(void) + { +@@ -27,6 +27,8 @@ static int __init init_dlm(void) + if (error) + goto out; + ++ dlm_midcomms_init(); ++ + error = dlm_lockspace_init(); + if (error) + goto out_mem; +@@ -63,6 +65,7 @@ static int __init init_dlm(void) + out_lockspace: + dlm_lockspace_exit(); + out_mem: ++ dlm_midcomms_exit(); + dlm_memory_exit(); + out: + return error; +@@ -76,7 +79,7 @@ static void __exit exit_dlm(void) + dlm_config_exit(); + dlm_memory_exit(); + dlm_lockspace_exit(); +- dlm_lowcomms_exit(); ++ dlm_midcomms_exit(); + dlm_unregister_debugfs(); + } + +diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c +index 702c14de7a4bd..84a7a39fc12e6 100644 +--- a/fs/dlm/midcomms.c ++++ b/fs/dlm/midcomms.c +@@ -1142,13 +1142,28 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh) + } + + int dlm_midcomms_start(void) ++{ ++ return dlm_lowcomms_start(); ++} ++ ++void dlm_midcomms_stop(void) ++{ ++ dlm_lowcomms_stop(); ++} ++ ++void dlm_midcomms_init(void) + { + int i; + + for (i = 0; i < CONN_HASH_SIZE; i++) + INIT_HLIST_HEAD(&node_hash[i]); + +- return dlm_lowcomms_start(); ++ dlm_lowcomms_init(); ++} ++ ++void dlm_midcomms_exit(void) ++{ ++ dlm_lowcomms_exit(); + } + + static void dlm_act_fin_ack_rcv(struct midcomms_node *node) +diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h +index 579abc6929be2..1a36b7834dfc5 100644 +--- a/fs/dlm/midcomms.h ++++ b/fs/dlm/midcomms.h +@@ -20,6 +20,9 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, + void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh); + int dlm_midcomms_close(int nodeid); + int dlm_midcomms_start(void); ++void dlm_midcomms_stop(void); ++void dlm_midcomms_init(void); ++void dlm_midcomms_exit(void); + void dlm_midcomms_shutdown(void); + void dlm_midcomms_add_member(int nodeid); + void dlm_midcomms_remove_member(int nodeid); +-- +2.39.2 + diff --git a/queue-5.15/fs-dlm-fix-log-of-lowcomms-vs-midcomms.patch b/queue-5.15/fs-dlm-fix-log-of-lowcomms-vs-midcomms.patch new file mode 100644 index 00000000000..8b577b1564f --- /dev/null +++ b/queue-5.15/fs-dlm-fix-log-of-lowcomms-vs-midcomms.patch @@ -0,0 +1,38 @@ +From a2d3ae7d54ea368e9913e6c50e979a8fe3169939 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Oct 2022 16:45:26 -0400 +Subject: fs: dlm: fix log of lowcomms vs midcomms + +From: Alexander Aring + +[ Upstream commit 3e54c9e80e68b765d8877023d93f1eea1b9d1c54 ] + +This patch will fix a small issue when printing out that +dlm_midcomms_start() failed to start and it was printing out that the +dlm subcomponent lowcomms was failed but lowcomms is behind the midcomms +layer. + +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Stable-dep-of: aad633dc0cf9 ("fs: dlm: start midcomms before scand") +Signed-off-by: Sasha Levin +--- + fs/dlm/lockspace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c +index 10eddfa6c3d7b..a1b34605742fc 100644 +--- a/fs/dlm/lockspace.c ++++ b/fs/dlm/lockspace.c +@@ -393,7 +393,7 @@ static int threads_start(void) + /* Thread for sending/receiving messages for all lockspace's */ + error = dlm_midcomms_start(); + if (error) { +- log_print("cannot start dlm lowcomms %d", error); ++ log_print("cannot start dlm midcomms %d", error); + goto scand_fail; + } + +-- +2.39.2 + diff --git a/queue-5.15/fs-dlm-start-midcomms-before-scand.patch b/queue-5.15/fs-dlm-start-midcomms-before-scand.patch new file mode 100644 index 00000000000..4c63289e3fe --- /dev/null +++ b/queue-5.15/fs-dlm-start-midcomms-before-scand.patch @@ -0,0 +1,62 @@ +From 6d3a2406be8898dea62067f65a980fe39dcc3235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jan 2023 17:10:31 -0500 +Subject: fs: dlm: start midcomms before scand + +From: Alexander Aring + +[ Upstream commit aad633dc0cf90093998b1ae0ba9f19b5f1dab644 ] + +The scand kthread can send dlm messages out, especially dlm remove +messages to free memory for unused rsb on other nodes. To send out dlm +messages, midcomms must be initialized. This patch moves the midcomms +start before scand is started. + +Cc: stable@vger.kernel.org +Fixes: e7fd41792fc0 ("[DLM] The core of the DLM for GFS2/CLVM") +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Signed-off-by: Sasha Levin +--- + fs/dlm/lockspace.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c +index 1b49f375829d0..fa086a81a8476 100644 +--- a/fs/dlm/lockspace.c ++++ b/fs/dlm/lockspace.c +@@ -383,23 +383,23 @@ static int threads_start(void) + { + int error; + +- error = dlm_scand_start(); ++ /* Thread for sending/receiving messages for all lockspace's */ ++ error = dlm_midcomms_start(); + if (error) { +- log_print("cannot start dlm_scand thread %d", error); ++ log_print("cannot start dlm midcomms %d", error); + goto fail; + } + +- /* Thread for sending/receiving messages for all lockspace's */ +- error = dlm_midcomms_start(); ++ error = dlm_scand_start(); + if (error) { +- log_print("cannot start dlm midcomms %d", error); +- goto scand_fail; ++ log_print("cannot start dlm_scand thread %d", error); ++ goto midcomms_fail; + } + + return 0; + +- scand_fail: +- dlm_scand_stop(); ++ midcomms_fail: ++ dlm_midcomms_stop(); + fail: + return error; + } +-- +2.39.2 + diff --git a/queue-5.15/ice-copy-last-block-omitted-in-ice_get_module_eeprom.patch b/queue-5.15/ice-copy-last-block-omitted-in-ice_get_module_eeprom.patch new file mode 100644 index 00000000000..5a314f3b637 --- /dev/null +++ b/queue-5.15/ice-copy-last-block-omitted-in-ice_get_module_eeprom.patch @@ -0,0 +1,115 @@ +From ac37d5645e7b1a20e9c45ca22eb08b3902aea827 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Mar 2023 21:47:07 +0100 +Subject: ice: copy last block omitted in ice_get_module_eeprom() + +From: Petr Oros + +[ Upstream commit 84cba1840e68430325ac133a11be06bfb2f7acd8 ] + +ice_get_module_eeprom() is broken since commit e9c9692c8a81 ("ice: +Reimplement module reads used by ethtool") In this refactor, +ice_get_module_eeprom() reads the eeprom in blocks of size 8. +But the condition that should protect the buffer overflow +ignores the last block. The last block always contains zeros. + +Bug uncovered by ethtool upstream commit 9538f384b535 +("netlink: eeprom: Defer page requests to individual parsers") +After this commit, ethtool reads a block with length = 1; +to read the SFF-8024 identifier value. + +unpatched driver: +$ ethtool -m enp65s0f0np0 offset 0x90 length 8 +Offset Values +------ ------ +0x0090: 00 00 00 00 00 00 00 00 +$ ethtool -m enp65s0f0np0 offset 0x90 length 12 +Offset Values +------ ------ +0x0090: 00 00 01 a0 4d 65 6c 6c 00 00 00 00 +$ + +$ ethtool -m enp65s0f0np0 +Offset Values +------ ------ +0x0000: 11 06 06 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +0x0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 01 08 00 +0x0070: 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + +patched driver: +$ ethtool -m enp65s0f0np0 offset 0x90 length 8 +Offset Values +------ ------ +0x0090: 00 00 01 a0 4d 65 6c 6c +$ ethtool -m enp65s0f0np0 offset 0x90 length 12 +Offset Values +------ ------ +0x0090: 00 00 01 a0 4d 65 6c 6c 61 6e 6f 78 +$ ethtool -m enp65s0f0np0 + Identifier : 0x11 (QSFP28) + Extended identifier : 0x00 + Extended identifier description : 1.5W max. Power consumption + Extended identifier description : No CDR in TX, No CDR in RX + Extended identifier description : High Power Class (> 3.5 W) not enabled + Connector : 0x23 (No separable connector) + Transceiver codes : 0x88 0x00 0x00 0x00 0x00 0x00 0x00 0x00 + Transceiver type : 40G Ethernet: 40G Base-CR4 + Transceiver type : 25G Ethernet: 25G Base-CR CA-N + Encoding : 0x05 (64B/66B) + BR, Nominal : 25500Mbps + Rate identifier : 0x00 + Length (SMF,km) : 0km + Length (OM3 50um) : 0m + Length (OM2 50um) : 0m + Length (OM1 62.5um) : 0m + Length (Copper or Active cable) : 1m + Transmitter technology : 0xa0 (Copper cable unequalized) + Attenuation at 2.5GHz : 4db + Attenuation at 5.0GHz : 5db + Attenuation at 7.0GHz : 7db + Attenuation at 12.9GHz : 10db + ........ + .... + +Fixes: e9c9692c8a81 ("ice: Reimplement module reads used by ethtool") +Signed-off-by: Petr Oros +Reviewed-by: Jesse Brandeburg +Tested-by: Jesse Brandeburg +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c +index 24001035910e0..60f73e775beeb 100644 +--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c ++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c +@@ -3998,6 +3998,8 @@ ice_get_module_eeprom(struct net_device *netdev, + * SFP modules only ever use page 0. + */ + if (page == 0 || !(data[0x2] & 0x4)) { ++ u32 copy_len; ++ + /* If i2c bus is busy due to slow page change or + * link management access, call can fail. This is normal. + * So we retry this a few times. +@@ -4021,8 +4023,8 @@ ice_get_module_eeprom(struct net_device *netdev, + } + + /* Make sure we have enough room for the new block */ +- if ((i + SFF_READ_BLOCK_SIZE) < ee->len) +- memcpy(data + i, value, SFF_READ_BLOCK_SIZE); ++ copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i); ++ memcpy(data + i, value, copy_len); + } + } + return 0; +-- +2.39.2 + diff --git a/queue-5.15/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch b/queue-5.15/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch new file mode 100644 index 00000000000..e40d2b12f7b --- /dev/null +++ b/queue-5.15/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch @@ -0,0 +1,113 @@ +From 8de8c45975ee9e48ff68ebcede2026a1204926f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 15:30:24 +0000 +Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping() + +From: Eric Dumazet + +[ Upstream commit 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e ] + +ila_xlat_nl_cmd_get_mapping() generates an empty skb, +triggerring a recent sanity check [1]. + +Instead, return an error code, so that user space +can get it. + +[1] +skb_assert_len +WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline] +WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +Modules linked in: +CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 +pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : skb_assert_len include/linux/skbuff.h:2527 [inline] +pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +lr : skb_assert_len include/linux/skbuff.h:2527 [inline] +lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +sp : ffff80001e0d6c40 +x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0 +x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00 +x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10 +x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0 +x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000 +x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 +x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600 +x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001 +x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744 +x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e +Call trace: +skb_assert_len include/linux/skbuff.h:2527 [inline] +__dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +dev_queue_xmit include/linux/netdevice.h:3033 [inline] +__netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline] +__netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325 +netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338 +__netlink_sendskb net/netlink/af_netlink.c:1283 [inline] +netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292 +netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380 +nlmsg_unicast include/net/netlink.h:1099 [inline] +genlmsg_unicast include/net/genetlink.h:433 [inline] +genlmsg_reply include/net/genetlink.h:443 [inline] +ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493 +genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] +genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] +genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065 +netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574 +genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076 +netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] +netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365 +netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg net/socket.c:734 [inline] +____sys_sendmsg+0x558/0x844 net/socket.c:2479 +___sys_sendmsg net/socket.c:2533 [inline] +__sys_sendmsg+0x26c/0x33c net/socket.c:2562 +__do_sys_sendmsg net/socket.c:2571 [inline] +__se_sys_sendmsg net/socket.c:2569 [inline] +__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569 +__invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] +invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 +el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142 +do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 +el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637 +el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 +el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 +irq event stamp: 136484 +hardirqs last enabled at (136483): [] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345 +hardirqs last disabled at (136484): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405 +softirqs last enabled at (136418): [] softirq_handle_end kernel/softirq.c:414 [inline] +softirqs last enabled at (136418): [] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600 +softirqs last disabled at (136371): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 +---[ end trace 0000000000000000 ]--- +skb len=0 headroom=0 headlen=0 tailroom=192 +mac=(0,0) net=(0,-1) trans=-1 +shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) +csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) +hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0 +dev name=nlmon0 feat=0x0000000000005861 + +Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ila/ila_xlat.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c +index a1ac0e3d8c60c..163668531a57f 100644 +--- a/net/ipv6/ila/ila_xlat.c ++++ b/net/ipv6/ila/ila_xlat.c +@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) + + rcu_read_lock(); + ++ ret = -ESRCH; + ila = ila_lookup_by_params(&xp, ilan); + if (ila) { + ret = ila_dump_info(ila, +-- +2.39.2 + diff --git a/queue-5.15/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch b/queue-5.15/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch new file mode 100644 index 00000000000..042095da507 --- /dev/null +++ b/queue-5.15/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch @@ -0,0 +1,82 @@ +From 7277af8c9aa72f30aea97c62c92d537386b0286f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Feb 2023 21:08:15 +0800 +Subject: iommu/vt-d: Fix PASID directory pointer coherency + +From: Jacob Pan + +[ Upstream commit 194b3348bdbb7db65375c72f3f774aee4cc6614e ] + +On platforms that do not support IOMMU Extended capability bit 0 +Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing +any translation structures. IOMMU access goes only directly to +memory. Intel IOMMU code was missing a flush for the PASID table +directory that resulted in the unrecoverable fault as shown below. + +This patch adds clflush calls whenever allocating and updating +a PASID table directory to ensure cache coherency. + +On the reverse direction, there's no need to clflush the PASID directory +pointer when we deactivate a context entry in that IOMMU hardware will +not see the old PASID directory pointer after we clear the context entry. +PASID directory entries are also never freed once allocated. + + DMAR: DRHD: handling fault status reg 3 + DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000 + [fault reason 0x51] SM: Present bit in Directory Entry is clear + DMAR: Dump dmar1 table entries for IOVA 0x1026a4000 + DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001 + DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401 + DMAR: pasid dir entry: 0x0000000101b4e001 + DMAR: pasid table entry[0]: 0x0000000000000109 + DMAR: pasid table entry[1]: 0x0000000000000001 + DMAR: pasid table entry[2]: 0x0000000000000000 + DMAR: pasid table entry[3]: 0x0000000000000000 + DMAR: pasid table entry[4]: 0x0000000000000000 + DMAR: pasid table entry[5]: 0x0000000000000000 + DMAR: pasid table entry[6]: 0x0000000000000000 + DMAR: pasid table entry[7]: 0x0000000000000000 + DMAR: PTE not present at level 4 + +Cc: +Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables") +Reviewed-by: Kevin Tian +Reported-by: Sukumar Ghorai +Signed-off-by: Ashok Raj +Signed-off-by: Jacob Pan +Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/pasid.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c +index 9a3dd55aaa1c2..6dbc43b414ca3 100644 +--- a/drivers/iommu/intel/pasid.c ++++ b/drivers/iommu/intel/pasid.c +@@ -186,6 +186,9 @@ int intel_pasid_alloc_table(struct device *dev) + attach_out: + device_attach_pasid_table(info, pasid_table); + ++ if (!ecap_coherent(info->iommu->ecap)) ++ clflush_cache_range(pasid_table->table, size); ++ + return 0; + } + +@@ -276,6 +279,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) + free_pgtable_page(entries); + goto retry; + } ++ if (!ecap_coherent(info->iommu->ecap)) { ++ clflush_cache_range(entries, VTD_PAGE_SIZE); ++ clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); ++ } + } + + return &entries[index]; +-- +2.39.2 + diff --git a/queue-5.15/ipmi-ssif-add-a-timer-between-request-retries.patch b/queue-5.15/ipmi-ssif-add-a-timer-between-request-retries.patch new file mode 100644 index 00000000000..097055d95d9 --- /dev/null +++ b/queue-5.15/ipmi-ssif-add-a-timer-between-request-retries.patch @@ -0,0 +1,135 @@ +From d257b39e675008a4b71664bbbb507ae88b78b467 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Jan 2023 10:34:47 -0600 +Subject: ipmi:ssif: Add a timer between request retries + +From: Corey Minyard + +[ Upstream commit 00bb7e763ec9f384cb382455cb6ba5588b5375cf ] + +The IPMI spec has a time (T6) specified between request retries. Add +the handling for that. + +Reported by: Tony Camuso +Cc: stable@vger.kernel.org +Signed-off-by: Corey Minyard +Signed-off-by: Sasha Levin +--- + drivers/char/ipmi/ipmi_ssif.c | 34 +++++++++++++++++++++++++++------- + 1 file changed, 27 insertions(+), 7 deletions(-) + +diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c +index cae21632cf079..20dc2452815c7 100644 +--- a/drivers/char/ipmi/ipmi_ssif.c ++++ b/drivers/char/ipmi/ipmi_ssif.c +@@ -74,7 +74,8 @@ + /* + * Timer values + */ +-#define SSIF_MSG_USEC 60000 /* 60ms between message tries. */ ++#define SSIF_MSG_USEC 60000 /* 60ms between message tries (T3). */ ++#define SSIF_REQ_RETRY_USEC 60000 /* 60ms between send retries (T6). */ + #define SSIF_MSG_PART_USEC 5000 /* 5ms for a message part */ + + /* How many times to we retry sending/receiving the message. */ +@@ -82,7 +83,9 @@ + #define SSIF_RECV_RETRIES 250 + + #define SSIF_MSG_MSEC (SSIF_MSG_USEC / 1000) ++#define SSIF_REQ_RETRY_MSEC (SSIF_REQ_RETRY_USEC / 1000) + #define SSIF_MSG_JIFFIES ((SSIF_MSG_USEC * 1000) / TICK_NSEC) ++#define SSIF_REQ_RETRY_JIFFIES ((SSIF_REQ_RETRY_USEC * 1000) / TICK_NSEC) + #define SSIF_MSG_PART_JIFFIES ((SSIF_MSG_PART_USEC * 1000) / TICK_NSEC) + + /* +@@ -229,6 +232,9 @@ struct ssif_info { + bool got_alert; + bool waiting_alert; + ++ /* Used to inform the timeout that it should do a resend. */ ++ bool do_resend; ++ + /* + * If set to true, this will request events the next time the + * state machine is idle. +@@ -538,22 +544,28 @@ static void start_get(struct ssif_info *ssif_info) + ssif_info->recv, I2C_SMBUS_BLOCK_DATA); + } + ++static void start_resend(struct ssif_info *ssif_info); ++ + static void retry_timeout(struct timer_list *t) + { + struct ssif_info *ssif_info = from_timer(ssif_info, t, retry_timer); + unsigned long oflags, *flags; +- bool waiting; ++ bool waiting, resend; + + if (ssif_info->stopping) + return; + + flags = ipmi_ssif_lock_cond(ssif_info, &oflags); ++ resend = ssif_info->do_resend; ++ ssif_info->do_resend = false; + waiting = ssif_info->waiting_alert; + ssif_info->waiting_alert = false; + ipmi_ssif_unlock_cond(ssif_info, flags); + + if (waiting) + start_get(ssif_info); ++ if (resend) ++ start_resend(ssif_info); + } + + static void watch_timeout(struct timer_list *t) +@@ -602,8 +614,6 @@ static void ssif_alert(struct i2c_client *client, enum i2c_alert_protocol type, + start_get(ssif_info); + } + +-static void start_resend(struct ssif_info *ssif_info); +- + static void msg_done_handler(struct ssif_info *ssif_info, int result, + unsigned char *data, unsigned int len) + { +@@ -909,7 +919,13 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, + if (result < 0) { + ssif_info->retries_left--; + if (ssif_info->retries_left > 0) { +- start_resend(ssif_info); ++ /* ++ * Wait the retry timeout time per the spec, ++ * then redo the send. ++ */ ++ ssif_info->do_resend = true; ++ mod_timer(&ssif_info->retry_timer, ++ jiffies + SSIF_REQ_RETRY_JIFFIES); + return; + } + +@@ -1322,8 +1338,10 @@ static int do_cmd(struct i2c_client *client, int len, unsigned char *msg, + ret = i2c_smbus_write_block_data(client, SSIF_IPMI_REQUEST, len, msg); + if (ret) { + retry_cnt--; +- if (retry_cnt > 0) ++ if (retry_cnt > 0) { ++ msleep(SSIF_REQ_RETRY_MSEC); + goto retry1; ++ } + return -ENODEV; + } + +@@ -1464,8 +1482,10 @@ static int start_multipart_test(struct i2c_client *client, + 32, msg); + if (ret) { + retry_cnt--; +- if (retry_cnt > 0) ++ if (retry_cnt > 0) { ++ msleep(SSIF_REQ_RETRY_MSEC); + goto retry_write; ++ } + dev_err(&client->dev, "Could not write multi-part start, though the BMC said it could handle it. Just limit sends to one part.\n"); + return ret; + } +-- +2.39.2 + diff --git a/queue-5.15/ipmi-ssif-increase-the-message-retry-time.patch b/queue-5.15/ipmi-ssif-increase-the-message-retry-time.patch new file mode 100644 index 00000000000..e471ab720c3 --- /dev/null +++ b/queue-5.15/ipmi-ssif-increase-the-message-retry-time.patch @@ -0,0 +1,36 @@ +From 3f1f6bd99bd085ce27c41fa9622c913af1321a48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Nov 2022 15:03:11 -0500 +Subject: ipmi:ssif: Increase the message retry time + +From: Corey Minyard + +[ Upstream commit 39721d62bbc16ebc9bb2bdc2c163658f33da3b0b ] + +The spec states that the minimum message retry time is 60ms, but it was +set to 20ms. Correct it. + +Reported by: Tony Camuso +Signed-off-by: Corey Minyard +Stable-dep-of: 00bb7e763ec9 ("ipmi:ssif: Add a timer between request retries") +Signed-off-by: Sasha Levin +--- + drivers/char/ipmi/ipmi_ssif.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c +index 427bf618c4470..cae21632cf079 100644 +--- a/drivers/char/ipmi/ipmi_ssif.c ++++ b/drivers/char/ipmi/ipmi_ssif.c +@@ -74,7 +74,7 @@ + /* + * Timer values + */ +-#define SSIF_MSG_USEC 20000 /* 20ms between message tries. */ ++#define SSIF_MSG_USEC 60000 /* 60ms between message tries. */ + #define SSIF_MSG_PART_USEC 5000 /* 5ms for a message part */ + + /* How many times to we retry sending/receiving the message. */ +-- +2.39.2 + diff --git a/queue-5.15/irqdomain-refactor-__irq_domain_alloc_irqs.patch b/queue-5.15/irqdomain-refactor-__irq_domain_alloc_irqs.patch new file mode 100644 index 00000000000..a7e294fcc88 --- /dev/null +++ b/queue-5.15/irqdomain-refactor-__irq_domain_alloc_irqs.patch @@ -0,0 +1,155 @@ +From 658c502b2e5fb1daa2bc9a7e63c217e4efaf2511 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Feb 2023 11:42:47 +0100 +Subject: irqdomain: Refactor __irq_domain_alloc_irqs() + +From: Johan Hovold + +[ Upstream commit d55f7f4c58c07beb5050a834bf57ae2ede599c7e ] + +Refactor __irq_domain_alloc_irqs() so that it can be called internally +while holding the irq_domain_mutex. + +This will be used to fix a shared-interrupt mapping race, hence the +Fixes tag. + +Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings") +Cc: stable@vger.kernel.org # 4.8 +Tested-by: Hsin-Yi Wang +Tested-by: Mark-PK Tsai +Signed-off-by: Johan Hovold +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++------------------- + 1 file changed, 48 insertions(+), 40 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index 96ecd179717bf..e0b67784ac1e0 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -1485,40 +1485,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); + } + +-/** +- * __irq_domain_alloc_irqs - Allocate IRQs from domain +- * @domain: domain to allocate from +- * @irq_base: allocate specified IRQ number if irq_base >= 0 +- * @nr_irqs: number of IRQs to allocate +- * @node: NUMA node id for memory allocation +- * @arg: domain specific argument +- * @realloc: IRQ descriptors have already been allocated if true +- * @affinity: Optional irq affinity mask for multiqueue devices +- * +- * Allocate IRQ numbers and initialized all data structures to support +- * hierarchy IRQ domains. +- * Parameter @realloc is mainly to support legacy IRQs. +- * Returns error code or allocated IRQ number +- * +- * The whole process to setup an IRQ has been split into two steps. +- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ +- * descriptor and required hardware resources. The second step, +- * irq_domain_activate_irq(), is to program the hardware with preallocated +- * resources. In this way, it's easier to rollback when failing to +- * allocate resources. +- */ +-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, +- unsigned int nr_irqs, int node, void *arg, +- bool realloc, const struct irq_affinity_desc *affinity) ++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity) + { + int i, ret, virq; + +- if (domain == NULL) { +- domain = irq_default_domain; +- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) +- return -EINVAL; +- } +- + if (realloc && irq_base >= 0) { + virq = irq_base; + } else { +@@ -1537,24 +1509,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + goto out_free_desc; + } + +- mutex_lock(&irq_domain_mutex); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); +- if (ret < 0) { +- mutex_unlock(&irq_domain_mutex); ++ if (ret < 0) + goto out_free_irq_data; +- } + + for (i = 0; i < nr_irqs; i++) { + ret = irq_domain_trim_hierarchy(virq + i); +- if (ret) { +- mutex_unlock(&irq_domain_mutex); ++ if (ret) + goto out_free_irq_data; +- } + } +- ++ + for (i = 0; i < nr_irqs; i++) + irq_domain_insert_irq(virq + i); +- mutex_unlock(&irq_domain_mutex); + + return virq; + +@@ -1565,6 +1531,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + return ret; + } + ++/** ++ * __irq_domain_alloc_irqs - Allocate IRQs from domain ++ * @domain: domain to allocate from ++ * @irq_base: allocate specified IRQ number if irq_base >= 0 ++ * @nr_irqs: number of IRQs to allocate ++ * @node: NUMA node id for memory allocation ++ * @arg: domain specific argument ++ * @realloc: IRQ descriptors have already been allocated if true ++ * @affinity: Optional irq affinity mask for multiqueue devices ++ * ++ * Allocate IRQ numbers and initialized all data structures to support ++ * hierarchy IRQ domains. ++ * Parameter @realloc is mainly to support legacy IRQs. ++ * Returns error code or allocated IRQ number ++ * ++ * The whole process to setup an IRQ has been split into two steps. ++ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ ++ * descriptor and required hardware resources. The second step, ++ * irq_domain_activate_irq(), is to program the hardware with preallocated ++ * resources. In this way, it's easier to rollback when failing to ++ * allocate resources. ++ */ ++int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity) ++{ ++ int ret; ++ ++ if (domain == NULL) { ++ domain = irq_default_domain; ++ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) ++ return -EINVAL; ++ } ++ ++ mutex_lock(&irq_domain_mutex); ++ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, ++ realloc, affinity); ++ mutex_unlock(&irq_domain_mutex); ++ ++ return ret; ++} ++ + /* The irq_data was moved, fix the revmap to refer to the new location */ + static void irq_domain_fix_revmap(struct irq_data *d) + { +-- +2.39.2 + diff --git a/queue-5.15/kvm-optimize-kvm_make_vcpus_request_mask-a-bit.patch b/queue-5.15/kvm-optimize-kvm_make_vcpus_request_mask-a-bit.patch new file mode 100644 index 00000000000..503bc212d55 --- /dev/null +++ b/queue-5.15/kvm-optimize-kvm_make_vcpus_request_mask-a-bit.patch @@ -0,0 +1,151 @@ +From d01f4e1efeaf75c5f42b52190f7bdfeefbd330f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Sep 2021 09:51:37 +0200 +Subject: KVM: Optimize kvm_make_vcpus_request_mask() a bit + +From: Vitaly Kuznetsov + +[ Upstream commit ae0946cd3601752dc58f86d84258e5361e9c8cd4 ] + +Iterating over set bits in 'vcpu_bitmap' should be faster than going +through all vCPUs, especially when just a few bits are set. + +Drop kvm_make_vcpus_request_mask() call from kvm_make_all_cpus_request_except() +to avoid handling the special case when 'vcpu_bitmap' is NULL, move the +code to kvm_make_all_cpus_request_except() itself. + +Signed-off-by: Vitaly Kuznetsov +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Message-Id: <20210903075141.403071-5-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 2b0128127373 ("KVM: Register /dev/kvm as the _very_ last thing during initialization") +Signed-off-by: Sasha Levin +--- + virt/kvm/kvm_main.c | 88 +++++++++++++++++++++++++++------------------ + 1 file changed, 53 insertions(+), 35 deletions(-) + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 3ffed093d3ea2..d08dbf0be2fce 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -257,50 +257,57 @@ static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait) + return true; + } + ++static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu, ++ unsigned int req, cpumask_var_t tmp, ++ int current_cpu) ++{ ++ int cpu; ++ ++ kvm_make_request(req, vcpu); ++ ++ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) ++ return; ++ ++ /* ++ * tmp can be "unavailable" if cpumasks are allocated off stack as ++ * allocation of the mask is deliberately not fatal and is handled by ++ * falling back to kicking all online CPUs. ++ */ ++ if (!cpumask_available(tmp)) ++ return; ++ ++ /* ++ * Note, the vCPU could get migrated to a different pCPU at any point ++ * after kvm_request_needs_ipi(), which could result in sending an IPI ++ * to the previous pCPU. But, that's OK because the purpose of the IPI ++ * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is ++ * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES ++ * after this point is also OK, as the requirement is only that KVM wait ++ * for vCPUs that were reading SPTEs _before_ any changes were ++ * finalized. See kvm_vcpu_kick() for more details on handling requests. ++ */ ++ if (kvm_request_needs_ipi(vcpu, req)) { ++ cpu = READ_ONCE(vcpu->cpu); ++ if (cpu != -1 && cpu != current_cpu) ++ __cpumask_set_cpu(cpu, tmp); ++ } ++} ++ + bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except, + unsigned long *vcpu_bitmap, cpumask_var_t tmp) + { +- int i, cpu, me; + struct kvm_vcpu *vcpu; ++ int i, me; + bool called; + + me = get_cpu(); + +- kvm_for_each_vcpu(i, vcpu, kvm) { +- if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) || +- vcpu == except) ++ for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) { ++ vcpu = kvm_get_vcpu(kvm, i); ++ if (!vcpu || vcpu == except) + continue; +- +- kvm_make_request(req, vcpu); +- +- if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) +- continue; +- +- /* +- * tmp can be "unavailable" if cpumasks are allocated off stack +- * as allocation of the mask is deliberately not fatal and is +- * handled by falling back to kicking all online CPUs. +- */ +- if (!cpumask_available(tmp)) +- continue; +- +- /* +- * Note, the vCPU could get migrated to a different pCPU at any +- * point after kvm_request_needs_ipi(), which could result in +- * sending an IPI to the previous pCPU. But, that's ok because +- * the purpose of the IPI is to ensure the vCPU returns to +- * OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates. +- * Entering READING_SHADOW_PAGE_TABLES after this point is also +- * ok, as the requirement is only that KVM wait for vCPUs that +- * were reading SPTEs _before_ any changes were finalized. See +- * kvm_vcpu_kick() for more details on handling requests. +- */ +- if (kvm_request_needs_ipi(vcpu, req)) { +- cpu = READ_ONCE(vcpu->cpu); +- if (cpu != -1 && cpu != me) +- __cpumask_set_cpu(cpu, tmp); +- } ++ kvm_make_vcpu_request(kvm, vcpu, req, tmp, me); + } + + called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); +@@ -312,12 +319,23 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except) + { ++ struct kvm_vcpu *vcpu; + cpumask_var_t cpus; + bool called; ++ int i, me; + + zalloc_cpumask_var(&cpus, GFP_ATOMIC); + +- called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus); ++ me = get_cpu(); ++ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ if (vcpu == except) ++ continue; ++ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me); ++ } ++ ++ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); ++ put_cpu(); + + free_cpumask_var(cpus); + return called; +-- +2.39.2 + diff --git a/queue-5.15/kvm-pre-allocate-cpumasks-for-kvm_make_all_cpus_requ.patch b/queue-5.15/kvm-pre-allocate-cpumasks-for-kvm_make_all_cpus_requ.patch new file mode 100644 index 00000000000..afa2b9214ce --- /dev/null +++ b/queue-5.15/kvm-pre-allocate-cpumasks-for-kvm_make_all_cpus_requ.patch @@ -0,0 +1,112 @@ +From 6b20a251f601893b0958846cc1b5dc0130c1b54e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Sep 2021 09:51:40 +0200 +Subject: KVM: Pre-allocate cpumasks for kvm_make_all_cpus_request_except() + +From: Vitaly Kuznetsov + +[ Upstream commit baff59ccdc657d290be51b95b38ebe5de40036b4 ] + +Allocating cpumask dynamically in zalloc_cpumask_var() is not ideal. +Allocation is somewhat slow and can (in theory and when CPUMASK_OFFSTACK) +fail. kvm_make_all_cpus_request_except() already disables preemption so +we can use pre-allocated per-cpu cpumasks instead. + +Signed-off-by: Vitaly Kuznetsov +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Message-Id: <20210903075141.403071-8-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 2b0128127373 ("KVM: Register /dev/kvm as the _very_ last thing during initialization") +Signed-off-by: Sasha Levin +--- + virt/kvm/kvm_main.c | 29 +++++++++++++++++++++++------ + 1 file changed, 23 insertions(+), 6 deletions(-) + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index d08dbf0be2fce..c60e88b0b24ec 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -157,6 +157,8 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); + static unsigned long long kvm_createvm_count; + static unsigned long long kvm_active_vms; + ++static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); ++ + __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) + { +@@ -320,14 +322,15 @@ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except) + { + struct kvm_vcpu *vcpu; +- cpumask_var_t cpus; ++ struct cpumask *cpus; + bool called; + int i, me; + +- zalloc_cpumask_var(&cpus, GFP_ATOMIC); +- + me = get_cpu(); + ++ cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask); ++ cpumask_clear(cpus); ++ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu == except) + continue; +@@ -337,7 +340,6 @@ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); + put_cpu(); + +- free_cpumask_var(cpus); + return called; + } + +@@ -5637,9 +5639,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, + goto out_free_3; + } + ++ for_each_possible_cpu(cpu) { ++ if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), ++ GFP_KERNEL, cpu_to_node(cpu))) { ++ r = -ENOMEM; ++ goto out_free_4; ++ } ++ } ++ + r = kvm_async_pf_init(); + if (r) +- goto out_free; ++ goto out_free_5; + + kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; +@@ -5665,7 +5675,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, + + out_unreg: + kvm_async_pf_deinit(); +-out_free: ++out_free_5: ++ for_each_possible_cpu(cpu) ++ free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); ++out_free_4: + kmem_cache_destroy(kvm_vcpu_cache); + out_free_3: + unregister_reboot_notifier(&kvm_reboot_notifier); +@@ -5685,8 +5698,12 @@ EXPORT_SYMBOL_GPL(kvm_init); + + void kvm_exit(void) + { ++ int cpu; ++ + debugfs_remove_recursive(kvm_debugfs_dir); + misc_deregister(&kvm_dev); ++ for_each_possible_cpu(cpu) ++ free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); + kmem_cache_destroy(kvm_vcpu_cache); + kvm_async_pf_deinit(); + unregister_syscore_ops(&kvm_syscore_ops); +-- +2.39.2 + diff --git a/queue-5.15/kvm-register-dev-kvm-as-the-_very_-last-thing-during.patch b/queue-5.15/kvm-register-dev-kvm-as-the-_very_-last-thing-during.patch new file mode 100644 index 00000000000..9b891c6ecad --- /dev/null +++ b/queue-5.15/kvm-register-dev-kvm-as-the-_very_-last-thing-during.patch @@ -0,0 +1,88 @@ +From 441af723b4e2c428a236c58adbf77d83ddf9b1c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Nov 2022 23:08:45 +0000 +Subject: KVM: Register /dev/kvm as the _very_ last thing during initialization + +From: Sean Christopherson + +[ Upstream commit 2b01281273738bf2d6551da48d65db2df3f28998 ] + +Register /dev/kvm, i.e. expose KVM to userspace, only after all other +setup has completed. Once /dev/kvm is exposed, userspace can start +invoking KVM ioctls, creating VMs, etc... If userspace creates a VM +before KVM is done with its configuration, bad things may happen, e.g. +KVM will fail to properly migrate vCPU state if a VM is created before +KVM has registered preemption notifiers. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20221130230934.1014142-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + virt/kvm/kvm_main.c | 31 ++++++++++++++++++++++--------- + 1 file changed, 22 insertions(+), 9 deletions(-) + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index c60e88b0b24ec..17e22c654c4e4 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -5655,12 +5655,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; + +- r = misc_register(&kvm_dev); +- if (r) { +- pr_err("kvm: misc device register failed\n"); +- goto out_unreg; +- } +- + register_syscore_ops(&kvm_syscore_ops); + + kvm_preempt_ops.sched_in = kvm_sched_in; +@@ -5669,11 +5663,24 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, + kvm_init_debug(); + + r = kvm_vfio_ops_init(); +- WARN_ON(r); ++ if (WARN_ON_ONCE(r)) ++ goto err_vfio; ++ ++ /* ++ * Registration _must_ be the very last thing done, as this exposes ++ * /dev/kvm to userspace, i.e. all infrastructure must be setup! ++ */ ++ r = misc_register(&kvm_dev); ++ if (r) { ++ pr_err("kvm: misc device register failed\n"); ++ goto err_register; ++ } + + return 0; + +-out_unreg: ++err_register: ++ kvm_vfio_ops_exit(); ++err_vfio: + kvm_async_pf_deinit(); + out_free_5: + for_each_possible_cpu(cpu) +@@ -5700,8 +5707,14 @@ void kvm_exit(void) + { + int cpu; + +- debugfs_remove_recursive(kvm_debugfs_dir); ++ /* ++ * Note, unregistering /dev/kvm doesn't strictly need to come first, ++ * fops_get(), a.k.a. try_module_get(), prevents acquiring references ++ * to KVM while the module is being stopped. ++ */ + misc_deregister(&kvm_dev); ++ ++ debugfs_remove_recursive(kvm_debugfs_dir); + for_each_possible_cpu(cpu) + free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); + kmem_cache_destroy(kvm_vcpu_cache); +-- +2.39.2 + diff --git a/queue-5.15/kvm-svm-don-t-rewrite-guest-icr-on-avic-ipi-virtuali.patch b/queue-5.15/kvm-svm-don-t-rewrite-guest-icr-on-avic-ipi-virtuali.patch new file mode 100644 index 00000000000..8d1dbf4d262 --- /dev/null +++ b/queue-5.15/kvm-svm-don-t-rewrite-guest-icr-on-avic-ipi-virtuali.patch @@ -0,0 +1,75 @@ +From 6701e843fe3b3297a3e60fbf6cfa24abc1a5fc45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Feb 2022 21:41:59 +0000 +Subject: KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure + +From: Sean Christopherson + +[ Upstream commit b51818afdc1d3c7cc269e295953685558d3af71c ] + +Don't bother rewriting the ICR value into the vAPIC page on an AVIC IPI +virtualization failure, the access is a trap, i.e. the value has already +been written to the vAPIC page. The one caveat is if hardware left the +BUSY flag set (which appears to happen somewhat arbitrarily), in which +case go through the "nodecode" APIC-write path in order to clear the BUSY +flag. + +Signed-off-by: Sean Christopherson +Message-Id: <20220204214205.3306634-6-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/lapic.c | 1 + + arch/x86/kvm/svm/avic.c | 22 +++++++++++----------- + 2 files changed, 12 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index 25530a908b4cd..8c9e41ff2a24e 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1295,6 +1295,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) + + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); + } ++EXPORT_SYMBOL_GPL(kvm_apic_send_ipi); + + static u32 apic_get_tmcct(struct kvm_lapic *apic) + { +diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c +index 3d3f8dfb80457..52778be77713f 100644 +--- a/arch/x86/kvm/svm/avic.c ++++ b/arch/x86/kvm/svm/avic.c +@@ -320,18 +320,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) + switch (id) { + case AVIC_IPI_FAILURE_INVALID_INT_TYPE: + /* +- * AVIC hardware handles the generation of +- * IPIs when the specified Message Type is Fixed +- * (also known as fixed delivery mode) and +- * the Trigger Mode is edge-triggered. The hardware +- * also supports self and broadcast delivery modes +- * specified via the Destination Shorthand(DSH) +- * field of the ICRL. Logical and physical APIC ID +- * formats are supported. All other IPI types cause +- * a #VMEXIT, which needs to emulated. ++ * Emulate IPIs that are not handled by AVIC hardware, which ++ * only virtualizes Fixed, Edge-Triggered INTRs. The exit is ++ * a trap, e.g. ICR holds the correct value and RIP has been ++ * advanced, KVM is responsible only for emulating the IPI. ++ * Sadly, hardware may sometimes leave the BUSY flag set, in ++ * which case KVM needs to emulate the ICR write as well in ++ * order to clear the BUSY flag. + */ +- kvm_lapic_reg_write(apic, APIC_ICR2, icrh); +- kvm_lapic_reg_write(apic, APIC_ICR, icrl); ++ if (icrl & APIC_ICR_BUSY) ++ kvm_apic_write_nodecode(vcpu, APIC_ICR); ++ else ++ kvm_apic_send_ipi(apic, icrl, icrh); + break; + case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: + /* +-- +2.39.2 + diff --git a/queue-5.15/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due.patch b/queue-5.15/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due.patch new file mode 100644 index 00000000000..1a7e500b4b5 --- /dev/null +++ b/queue-5.15/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due.patch @@ -0,0 +1,70 @@ +From 8f09813101a85e82da8d99e36f941f6bf73e3e05 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Jan 2023 01:12:37 +0000 +Subject: KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid + target + +From: Sean Christopherson + +[ Upstream commit 5aede752a839904059c2b5d68be0dc4501c6c15f ] + +Emulate ICR writes on AVIC IPI failures due to invalid targets using the +same logic as failures due to invalid types. AVIC acceleration fails if +_any_ of the targets are invalid, and crucially VM-Exits before sending +IPIs to targets that _are_ valid. In logical mode, the destination is a +bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing +causes KVM to drop IPIs if at least one target is valid and at least one +target is invalid. + +Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC") +Cc: stable@vger.kernel.org +Reviewed-by: Paolo Bonzini +Reviewed-by: Maxim Levitsky +Signed-off-by: Sean Christopherson +Message-Id: <20230106011306.85230-5-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/svm/avic.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c +index 52778be77713f..b595a33860d70 100644 +--- a/arch/x86/kvm/svm/avic.c ++++ b/arch/x86/kvm/svm/avic.c +@@ -318,14 +318,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) + trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index); + + switch (id) { ++ case AVIC_IPI_FAILURE_INVALID_TARGET: + case AVIC_IPI_FAILURE_INVALID_INT_TYPE: + /* + * Emulate IPIs that are not handled by AVIC hardware, which +- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is +- * a trap, e.g. ICR holds the correct value and RIP has been +- * advanced, KVM is responsible only for emulating the IPI. +- * Sadly, hardware may sometimes leave the BUSY flag set, in +- * which case KVM needs to emulate the ICR write as well in ++ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over ++ * if _any_ targets are invalid, e.g. if the logical mode mask ++ * is a superset of running vCPUs. ++ * ++ * The exit is a trap, e.g. ICR holds the correct value and RIP ++ * has been advanced, KVM is responsible only for emulating the ++ * IPI. Sadly, hardware may sometimes leave the BUSY flag set, ++ * in which case KVM needs to emulate the ICR write as well in + * order to clear the BUSY flag. + */ + if (icrl & APIC_ICR_BUSY) +@@ -341,8 +345,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) + */ + avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh); + break; +- case AVIC_IPI_FAILURE_INVALID_TARGET: +- break; + case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: + WARN_ONCE(1, "Invalid backing page\n"); + break; +-- +2.39.2 + diff --git a/queue-5.15/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch b/queue-5.15/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch new file mode 100644 index 00000000000..2009b918d0e --- /dev/null +++ b/queue-5.15/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch @@ -0,0 +1,86 @@ +From 5ce4a0ebacdbf85b1f66e3abf7ddc85f1f5382f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 01:39:13 +0900 +Subject: net: caif: Fix use-after-free in cfusbl_device_notify() + +From: Shigeru Yoshida + +[ Upstream commit 9781e98a97110f5e76999058368b4be76a788484 ] + +syzbot reported use-after-free in cfusbl_device_notify() [1]. This +causes a stack trace like below: + +BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 +Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214 + +CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: netns cleanup_net +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 + print_report mm/kasan/report.c:429 [inline] + kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 + cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 + notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 + call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 + call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] + call_netdevice_notifiers net/core/dev.c:1997 [inline] + netdev_wait_allrefs_any net/core/dev.c:10227 [inline] + netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341 + default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334 + ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 + cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 + process_one_work+0x996/0x1610 kernel/workqueue.c:2289 + worker_thread+0x665/0x1080 kernel/workqueue.c:2436 + kthread+0x2e9/0x3a0 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 + + +When unregistering a net device, unregister_netdevice_many_notify() +sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers +with NETDEV_UNREGISTER, and adds the device to the todo list. + +Later on, devices in the todo list are processed by netdev_run_todo(). +netdev_run_todo() waits devices' reference count become 1 while +rebdoadcasting NETDEV_UNREGISTER notification. + +When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple +times, the parent device might be freed. This could cause UAF. +Processing NETDEV_UNREGISTER multiple times also causes inbalance of +reference count for the module. + +This patch fixes the issue by accepting only first NETDEV_UNREGISTER +notification. + +Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") +CC: sjur.brandeland@stericsson.com +Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1] +Signed-off-by: Shigeru Yoshida +Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/caif/caif_usb.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c +index b02e1292f7f19..24488a4e2d26e 100644 +--- a/net/caif/caif_usb.c ++++ b/net/caif/caif_usb.c +@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, + struct usb_device *usbdev; + int res; + ++ if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) ++ return 0; ++ + /* Check whether we have a NCM device, and find its VID/PID. */ + if (!(dev->dev.parent && dev->dev.parent->driver && + strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) +-- +2.39.2 + diff --git a/queue-5.15/net-dsa-mt7530-permit-port-5-to-work-without-port-6-.patch b/queue-5.15/net-dsa-mt7530-permit-port-5-to-work-without-port-6-.patch new file mode 100644 index 00000000000..382ebf4374b --- /dev/null +++ b/queue-5.15/net-dsa-mt7530-permit-port-5-to-work-without-port-6-.patch @@ -0,0 +1,151 @@ +From 45fdc09cbfe86745b506071dafc76415d4624a7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 17:54:11 +0200 +Subject: net: dsa: mt7530: permit port 5 to work without port 6 on MT7621 SoC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Vladimir Oltean + +[ Upstream commit c8b8a3c601f2cfad25ab5ce5b04df700048aef6e ] + +The MT7530 switch from the MT7621 SoC has 2 ports which can be set up as +internal: port 5 and 6. Arınç reports that the GMAC1 attached to port 5 +receives corrupted frames, unless port 6 (attached to GMAC0) has been +brought up by the driver. This is true regardless of whether port 5 is +used as a user port or as a CPU port (carrying DSA tags). + +Offline debugging (blind for me) which began in the linked thread showed +experimentally that the configuration done by the driver for port 6 +contains a step which is needed by port 5 as well - the write to +CORE_GSWPLL_GRP2 (note that I've no idea as to what it does, apart from +the comment "Set core clock into 500Mhz"). Prints put by Arınç show that +the reset value of CORE_GSWPLL_GRP2 is RG_GSWPLL_POSDIV_500M(1) | +RG_GSWPLL_FBKDIV_500M(40) (0x128), both on the MCM MT7530 from the +MT7621 SoC, as well as on the standalone MT7530 from MT7623NI Bananapi +BPI-R2. Apparently, port 5 on the standalone MT7530 can work under both +values of the register, while on the MT7621 SoC it cannot. + +The call path that triggers the register write is: + +mt753x_phylink_mac_config() for port 6 +-> mt753x_pad_setup() + -> mt7530_pad_clk_setup() + +so this fully explains the behavior noticed by Arınç, that bringing port +6 up is necessary. + +The simplest fix for the problem is to extract the register writes which +are needed for both port 5 and 6 into a common mt7530_pll_setup() +function, which is called at mt7530_setup() time, immediately after +switch reset. We can argue that this mirrors the code layout introduced +in mt7531_setup() by commit 42bc4fafe359 ("net: mt7531: only do PLL once +after the reset"), in that the PLL setup has the exact same positioning, +and further work to consolidate the separate setup() functions is not +hindered. + +Testing confirms that: + +- the slight reordering of writes to MT7530_P6ECR and to + CORE_GSWPLL_GRP1 / CORE_GSWPLL_GRP2 introduced by this change does not + appear to cause problems for the operation of port 6 on MT7621 and on + MT7623 (where port 5 also always worked) + +- packets sent through port 5 are not corrupted anymore, regardless of + whether port 6 is enabled by phylink or not (or even present in the + device tree) + +My algorithm for determining the Fixes: tag is as follows. Testing shows +that some logic from mt7530_pad_clk_setup() is needed even for port 5. +Prior to commit ca366d6c889b ("net: dsa: mt7530: Convert to PHYLINK +API"), a call did exist for all phy_is_pseudo_fixed_link() ports - so +port 5 included. That commit replaced it with a temporary "Port 5 is not +supported!" comment, and the following commit 38f790a80560 ("net: dsa: +mt7530: Add support for port 5") replaced that comment with a +configuration procedure in mt7530_setup_port5() which was insufficient +for port 5 to work. I'm laying the blame on the patch that claimed +support for port 5, although one would have also needed the change from +commit c3b8e07909db ("net: dsa: mt7530: setup core clock even in TRGMII +mode") for the write to be performed completely independently from port +6's configuration. + +Thanks go to Arınç for describing the problem, for debugging and for +testing. + +Reported-by: Arınç ÜNAL +Link: https://lore.kernel.org/netdev/f297c2c4-6e7c-57ac-2394-f6025d309b9d@arinc9.com/ +Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5") +Signed-off-by: Vladimir Oltean +Tested-by: Arınç ÜNAL +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230307155411.868573-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 35 ++++++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index c1505de23957f..7bcfa3be95e29 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -388,6 +388,24 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, + mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]); + } + ++/* Set up switch core clock for MT7530 */ ++static void mt7530_pll_setup(struct mt7530_priv *priv) ++{ ++ /* Disable PLL */ ++ core_write(priv, CORE_GSWPLL_GRP1, 0); ++ ++ /* Set core clock into 500Mhz */ ++ core_write(priv, CORE_GSWPLL_GRP2, ++ RG_GSWPLL_POSDIV_500M(1) | ++ RG_GSWPLL_FBKDIV_500M(25)); ++ ++ /* Enable PLL */ ++ core_write(priv, CORE_GSWPLL_GRP1, ++ RG_GSWPLL_EN_PRE | ++ RG_GSWPLL_POSDIV_200M(2) | ++ RG_GSWPLL_FBKDIV_200M(32)); ++} ++ + /* Setup TX circuit including relevant PAD and driving */ + static int + mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) +@@ -448,21 +466,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, + REG_GSWCK_EN | REG_TRGMIICK_EN); + +- /* Setup core clock for MT7530 */ +- /* Disable PLL */ +- core_write(priv, CORE_GSWPLL_GRP1, 0); +- +- /* Set core clock into 500Mhz */ +- core_write(priv, CORE_GSWPLL_GRP2, +- RG_GSWPLL_POSDIV_500M(1) | +- RG_GSWPLL_FBKDIV_500M(25)); +- +- /* Enable PLL */ +- core_write(priv, CORE_GSWPLL_GRP1, +- RG_GSWPLL_EN_PRE | +- RG_GSWPLL_POSDIV_200M(2) | +- RG_GSWPLL_FBKDIV_200M(32)); +- + /* Setup the MT7530 TRGMII Tx Clock */ + core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); + core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0)); +@@ -2163,6 +2166,8 @@ mt7530_setup(struct dsa_switch *ds) + SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | + SYS_CTRL_REG_RST); + ++ mt7530_pll_setup(priv); ++ + /* Enable Port 6 only; P5 as GMAC5 which currently is not supported */ + val = mt7530_read(priv, MT7530_MHWTRAP); + val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS; +-- +2.39.2 + diff --git a/queue-5.15/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch b/queue-5.15/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch new file mode 100644 index 00000000000..037c8ddae9f --- /dev/null +++ b/queue-5.15/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch @@ -0,0 +1,71 @@ +From 1330e57943030d5a54931a6c387c82c200eb3f97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 Mar 2023 13:43:20 +0000 +Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Golle + +[ Upstream commit 193250ace270fecd586dd2d0dfbd9cbd2ade977f ] + +Fix data corruption issue with SerDes connected PHYs operating at 1.25 +Gbps speed where we could previously observe about 30% packet loss while +the bad packet counter was increasing. + +As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531 +switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using +rate-adaptation to 2500Base-X mode, this issue only got exposed now when +we started trying to use SFP modules operating with 1.25 Gbps with the +BananaPi R3 board. + +The fix is to set bit 12 which disables the RX FIFO clear function when +setting up MAC MCR, MediaTek SDK did the same change stating: +"If without this patch, kernel might receive invalid packets that are +corrupted by GMAC."[1] + +[1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63 + +Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC") +Tested-by: Bjørn Mork +Signed-off-by: Daniel Golle +Reviewed-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index cc6a5b2f24e3e..bb1acdb0c62b3 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -363,7 +363,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, + mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); + mcr_new = mcr_cur; + mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | +- MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK; ++ MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | ++ MAC_MCR_RX_FIFO_CLR_DIS; + + /* Only update control register when needed! */ + if (mcr_new != mcr_cur) +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +index f2d90639d7ed1..d60260e00a3fc 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -369,6 +369,7 @@ + #define MAC_MCR_FORCE_MODE BIT(15) + #define MAC_MCR_TX_EN BIT(14) + #define MAC_MCR_RX_EN BIT(13) ++#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12) + #define MAC_MCR_BACKOFF_EN BIT(9) + #define MAC_MCR_BACKPR_EN BIT(8) + #define MAC_MCR_FORCE_RX_FC BIT(5) +-- +2.39.2 + diff --git a/queue-5.15/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch b/queue-5.15/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch new file mode 100644 index 00000000000..76c3864e38e --- /dev/null +++ b/queue-5.15/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch @@ -0,0 +1,126 @@ +From da481f325e76c17da446f607123ce3e55052e3e4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Mar 2023 08:43:07 -0700 +Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific + registers from the MAC driver + +From: Yuiko Oshino + +[ Upstream commit e57cf3639c323eeed05d3725fd82f91b349adca8 ] + +Move the LAN7800 internal phy (phy ID 0x0007c132) specific register +accesses to the phy driver (microchip.c). + +Fix the error reported by Enguerrand de Ribaucourt in December 2022, +"Some operations during the cable switch workaround modify the register +LAN88XX_INT_MASK of the PHY. However, this register is specific to the +LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801, +that register (0x19), corresponds to the LED and MAC address +configuration, resulting in unapropriate behavior." + +I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800 +with the internal PHY. + +Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error") +Signed-off-by: Yuiko Oshino +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++ + drivers/net/usb/lan78xx.c | 27 +-------------------------- + 2 files changed, 33 insertions(+), 26 deletions(-) + +diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c +index 9f1f2b6c97d4f..230f2fcf9c46a 100644 +--- a/drivers/net/phy/microchip.c ++++ b/drivers/net/phy/microchip.c +@@ -342,6 +342,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev) + return genphy_config_aneg(phydev); + } + ++static void lan88xx_link_change_notify(struct phy_device *phydev) ++{ ++ int temp; ++ ++ /* At forced 100 F/H mode, chip may fail to set mode correctly ++ * when cable is switched between long(~50+m) and short one. ++ * As workaround, set to 10 before setting to 100 ++ * at forced 100 F/H mode. ++ */ ++ if (!phydev->autoneg && phydev->speed == 100) { ++ /* disable phy interrupt */ ++ temp = phy_read(phydev, LAN88XX_INT_MASK); ++ temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; ++ phy_write(phydev, LAN88XX_INT_MASK, temp); ++ ++ temp = phy_read(phydev, MII_BMCR); ++ temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); ++ phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ ++ temp |= BMCR_SPEED100; ++ phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ ++ ++ /* clear pending interrupt generated while workaround */ ++ temp = phy_read(phydev, LAN88XX_INT_STS); ++ ++ /* enable phy interrupt back */ ++ temp = phy_read(phydev, LAN88XX_INT_MASK); ++ temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; ++ phy_write(phydev, LAN88XX_INT_MASK, temp); ++ } ++} ++ + static struct phy_driver microchip_phy_driver[] = { + { + .phy_id = 0x0007c130, +@@ -355,6 +386,7 @@ static struct phy_driver microchip_phy_driver[] = { + + .config_init = lan88xx_config_init, + .config_aneg = lan88xx_config_aneg, ++ .link_change_notify = lan88xx_link_change_notify, + + .config_intr = lan88xx_phy_config_intr, + .handle_interrupt = lan88xx_handle_interrupt, +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index 3e1a83a22fdd6..5700c9d20a3e2 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -1950,33 +1950,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) + static void lan78xx_link_status_change(struct net_device *net) + { + struct phy_device *phydev = net->phydev; +- int temp; +- +- /* At forced 100 F/H mode, chip may fail to set mode correctly +- * when cable is switched between long(~50+m) and short one. +- * As workaround, set to 10 before setting to 100 +- * at forced 100 F/H mode. +- */ +- if (!phydev->autoneg && (phydev->speed == 100)) { +- /* disable phy interrupt */ +- temp = phy_read(phydev, LAN88XX_INT_MASK); +- temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; +- phy_write(phydev, LAN88XX_INT_MASK, temp); + +- temp = phy_read(phydev, MII_BMCR); +- temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); +- phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ +- temp |= BMCR_SPEED100; +- phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ +- +- /* clear pending interrupt generated while workaround */ +- temp = phy_read(phydev, LAN88XX_INT_STS); +- +- /* enable phy interrupt back */ +- temp = phy_read(phydev, LAN88XX_INT_MASK); +- temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; +- phy_write(phydev, LAN88XX_INT_MASK, temp); +- } ++ phy_print_status(phydev); + } + + static int irq_map(struct irq_domain *d, unsigned int irq, +-- +2.39.2 + diff --git a/queue-5.15/net-phy-smsc-cache-interrupt-mask.patch b/queue-5.15/net-phy-smsc-cache-interrupt-mask.patch new file mode 100644 index 00000000000..5251013f8b4 --- /dev/null +++ b/queue-5.15/net-phy-smsc-cache-interrupt-mask.patch @@ -0,0 +1,99 @@ +From b64a960e4385d145b98800b2839f59c6d5c8554b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 May 2022 10:42:06 +0200 +Subject: net: phy: smsc: Cache interrupt mask + +From: Lukas Wunner + +[ Upstream commit 7e8b617eb93f9fcaedac02cd19edcad31c767386 ] + +Cache the interrupt mask to avoid re-reading it from the PHY upon every +interrupt. + +This will simplify a subsequent commit which detects hot-removal in the +interrupt handler and bails out. + +Analyzing and debugging PHY transactions also becomes simpler if such +redundant reads are avoided. + +Last not least, interrupt overhead and latency is slightly improved. + +Tested-by: Oleksij Rempel # LAN9514/9512/9500 +Tested-by: Ferry Toth # LAN9514 +Signed-off-by: Lukas Wunner +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Stable-dep-of: 58aac3a2ef41 ("net: phy: smsc: fix link up detection in forced irq mode") +Signed-off-by: Sasha Levin +--- + drivers/net/phy/smsc.c | 24 +++++++++++------------- + 1 file changed, 11 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c +index 636b0907a5987..63dca3bb56110 100644 +--- a/drivers/net/phy/smsc.c ++++ b/drivers/net/phy/smsc.c +@@ -44,6 +44,7 @@ static struct smsc_hw_stat smsc_hw_stats[] = { + }; + + struct smsc_phy_priv { ++ u16 intmask; + bool energy_enable; + struct clk *refclk; + }; +@@ -58,7 +59,6 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev) + static int smsc_phy_config_intr(struct phy_device *phydev) + { + struct smsc_phy_priv *priv = phydev->priv; +- u16 intmask = 0; + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { +@@ -66,12 +66,15 @@ static int smsc_phy_config_intr(struct phy_device *phydev) + if (rc) + return rc; + +- intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6; ++ priv->intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6; + if (priv->energy_enable) +- intmask |= MII_LAN83C185_ISF_INT7; +- rc = phy_write(phydev, MII_LAN83C185_IM, intmask); ++ priv->intmask |= MII_LAN83C185_ISF_INT7; ++ ++ rc = phy_write(phydev, MII_LAN83C185_IM, priv->intmask); + } else { +- rc = phy_write(phydev, MII_LAN83C185_IM, intmask); ++ priv->intmask = 0; ++ ++ rc = phy_write(phydev, MII_LAN83C185_IM, 0); + if (rc) + return rc; + +@@ -83,13 +86,8 @@ static int smsc_phy_config_intr(struct phy_device *phydev) + + static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) + { +- int irq_status, irq_enabled; +- +- irq_enabled = phy_read(phydev, MII_LAN83C185_IM); +- if (irq_enabled < 0) { +- phy_error(phydev); +- return IRQ_NONE; +- } ++ struct smsc_phy_priv *priv = phydev->priv; ++ int irq_status; + + irq_status = phy_read(phydev, MII_LAN83C185_ISF); + if (irq_status < 0) { +@@ -97,7 +95,7 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) + return IRQ_NONE; + } + +- if (!(irq_status & irq_enabled)) ++ if (!(irq_status & priv->intmask)) + return IRQ_NONE; + + phy_trigger_machine(phydev); +-- +2.39.2 + diff --git a/queue-5.15/net-phy-smsc-fix-link-up-detection-in-forced-irq-mod.patch b/queue-5.15/net-phy-smsc-fix-link-up-detection-in-forced-irq-mod.patch new file mode 100644 index 00000000000..de0d96bb73e --- /dev/null +++ b/queue-5.15/net-phy-smsc-fix-link-up-detection-in-forced-irq-mod.patch @@ -0,0 +1,79 @@ +From bedd29e6f990aaf4fc874db3828b14f15559a8c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 Mar 2023 11:52:44 +0100 +Subject: net: phy: smsc: fix link up detection in forced irq mode + +From: Heiner Kallweit + +[ Upstream commit 58aac3a2ef414fea6d7fdf823ea177744a087d13 ] + +Currently link up can't be detected in forced mode if polling +isn't used. Only link up interrupt source we have is aneg +complete which isn't applicable in forced mode. Therefore we +have to use energy-on as link up indicator. + +Fixes: 7365494550f6 ("net: phy: smsc: skip ENERGYON interrupt if disabled") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/smsc.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c +index 63dca3bb56110..04e628788f1b5 100644 +--- a/drivers/net/phy/smsc.c ++++ b/drivers/net/phy/smsc.c +@@ -44,7 +44,6 @@ static struct smsc_hw_stat smsc_hw_stats[] = { + }; + + struct smsc_phy_priv { +- u16 intmask; + bool energy_enable; + struct clk *refclk; + }; +@@ -58,7 +57,6 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev) + + static int smsc_phy_config_intr(struct phy_device *phydev) + { +- struct smsc_phy_priv *priv = phydev->priv; + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { +@@ -66,14 +64,9 @@ static int smsc_phy_config_intr(struct phy_device *phydev) + if (rc) + return rc; + +- priv->intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6; +- if (priv->energy_enable) +- priv->intmask |= MII_LAN83C185_ISF_INT7; +- +- rc = phy_write(phydev, MII_LAN83C185_IM, priv->intmask); ++ rc = phy_write(phydev, MII_LAN83C185_IM, ++ MII_LAN83C185_ISF_INT_PHYLIB_EVENTS); + } else { +- priv->intmask = 0; +- + rc = phy_write(phydev, MII_LAN83C185_IM, 0); + if (rc) + return rc; +@@ -86,7 +79,6 @@ static int smsc_phy_config_intr(struct phy_device *phydev) + + static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) + { +- struct smsc_phy_priv *priv = phydev->priv; + int irq_status; + + irq_status = phy_read(phydev, MII_LAN83C185_ISF); +@@ -95,7 +87,7 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) + return IRQ_NONE; + } + +- if (!(irq_status & priv->intmask)) ++ if (!(irq_status & MII_LAN83C185_ISF_INT_PHYLIB_EVENTS)) + return IRQ_NONE; + + phy_trigger_machine(phydev); +-- +2.39.2 + diff --git a/queue-5.15/net-phylib-get-rid-of-unnecessary-locking.patch b/queue-5.15/net-phylib-get-rid-of-unnecessary-locking.patch new file mode 100644 index 00000000000..160c54d74f6 --- /dev/null +++ b/queue-5.15/net-phylib-get-rid-of-unnecessary-locking.patch @@ -0,0 +1,174 @@ +From 9d3547b575687d7a18205ed769b266b2f0035e74 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 16:37:54 +0000 +Subject: net: phylib: get rid of unnecessary locking + +From: Russell King (Oracle) + +[ Upstream commit f4b47a2e9463950df3e7c8b70e017877c1d4eb11 ] + +The locking in phy_probe() and phy_remove() does very little to prevent +any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA +warnings. Remove it. + +====================================================== +WARNING: possible circular locking dependency detected +6.2.0-dirty #1108 Tainted: G W E +------------------------------------------------------ +ip/415 is trying to acquire lock: +ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy] + +but task is already holding lock: +ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (rtnl_mutex){+.+.}-{3:3}: + __lock_acquire+0x35c/0x6c0 + lock_acquire.part.0+0xcc/0x220 + lock_acquire+0x68/0x84 + __mutex_lock+0x8c/0x414 + mutex_lock_nested+0x34/0x40 + rtnl_lock+0x24/0x30 + sfp_bus_add_upstream+0x34/0x150 + phy_sfp_probe+0x4c/0x94 [libphy] + mv3310_probe+0x148/0x184 [marvell10g] + phy_probe+0x8c/0x200 [libphy] + call_driver_probe+0xbc/0x15c + really_probe+0xc0/0x320 + __driver_probe_device+0x84/0x120 + driver_probe_device+0x44/0x120 + __device_attach_driver+0xc4/0x160 + bus_for_each_drv+0x80/0xe0 + __device_attach+0xb0/0x1f0 + device_initial_probe+0x1c/0x2c + bus_probe_device+0xa4/0xb0 + device_add+0x360/0x53c + phy_device_register+0x60/0xa4 [libphy] + fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio] + fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio] + of_mdiobus_register+0x140/0x340 [of_mdio] + orion_mdio_probe+0x298/0x3c0 [mvmdio] + platform_probe+0x70/0xe0 + call_driver_probe+0x34/0x15c + really_probe+0xc0/0x320 + __driver_probe_device+0x84/0x120 + driver_probe_device+0x44/0x120 + __driver_attach+0x104/0x210 + bus_for_each_dev+0x78/0xdc + driver_attach+0x2c/0x3c + bus_add_driver+0x184/0x240 + driver_register+0x80/0x13c + __platform_driver_register+0x30/0x3c + xt_compat_calc_jump+0x28/0xa4 [x_tables] + do_one_initcall+0x50/0x1b0 + do_init_module+0x50/0x1fc + load_module+0x684/0x744 + __do_sys_finit_module+0xc4/0x140 + __arm64_sys_finit_module+0x28/0x34 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0x6c/0x1b0 + do_el0_svc+0x34/0x44 + el0_svc+0x48/0xf0 + el0t_64_sync_handler+0xb8/0xc0 + el0t_64_sync+0x1a0/0x1a4 + +-> #0 (&dev->lock){+.+.}-{3:3}: + check_prev_add+0xb4/0xc80 + validate_chain+0x414/0x47c + __lock_acquire+0x35c/0x6c0 + lock_acquire.part.0+0xcc/0x220 + lock_acquire+0x68/0x84 + __mutex_lock+0x8c/0x414 + mutex_lock_nested+0x34/0x40 + phy_attach_direct+0x17c/0x3a0 [libphy] + phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink] + phylink_fwnode_phy_connect+0x48/0x60 [phylink] + mvpp2_open+0xec/0x2e0 [mvpp2] + __dev_open+0x104/0x214 + __dev_change_flags+0x1d4/0x254 + dev_change_flags+0x2c/0x7c + do_setlink+0x254/0xa50 + __rtnl_newlink+0x430/0x514 + rtnl_newlink+0x58/0x8c + rtnetlink_rcv_msg+0x17c/0x560 + netlink_rcv_skb+0x64/0x150 + rtnetlink_rcv+0x20/0x30 + netlink_unicast+0x1d4/0x2b4 + netlink_sendmsg+0x1a4/0x400 + ____sys_sendmsg+0x228/0x290 + ___sys_sendmsg+0x88/0xec + __sys_sendmsg+0x70/0xd0 + __arm64_sys_sendmsg+0x2c/0x40 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0x6c/0x1b0 + do_el0_svc+0x34/0x44 + el0_svc+0x48/0xf0 + el0t_64_sync_handler+0xb8/0xc0 + el0t_64_sync+0x1a0/0x1a4 + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(rtnl_mutex); + lock(&dev->lock); + lock(rtnl_mutex); + lock(&dev->lock); + + *** DEADLOCK *** + +Fixes: 298e54fa810e ("net: phy: add core phylib sfp support") +Reported-by: Marc Zyngier +Signed-off-by: Russell King (Oracle) +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phy_device.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 996842a1a9a35..73485383db4ef 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -3052,8 +3052,6 @@ static int phy_probe(struct device *dev) + if (phydrv->flags & PHY_IS_INTERNAL) + phydev->is_internal = true; + +- mutex_lock(&phydev->lock); +- + /* Deassert the reset signal */ + phy_device_reset(phydev, 0); + +@@ -3121,12 +3119,10 @@ static int phy_probe(struct device *dev) + phydev->state = PHY_READY; + + out: +- /* Assert the reset signal */ ++ /* Re-assert the reset signal on error */ + if (err) + phy_device_reset(phydev, 1); + +- mutex_unlock(&phydev->lock); +- + return err; + } + +@@ -3136,9 +3132,7 @@ static int phy_remove(struct device *dev) + + cancel_delayed_work_sync(&phydev->state_queue); + +- mutex_lock(&phydev->lock); + phydev->state = PHY_DOWN; +- mutex_unlock(&phydev->lock); + + sfp_bus_del_upstream(phydev->sfp_bus); + phydev->sfp_bus = NULL; +-- +2.39.2 + diff --git a/queue-5.15/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch b/queue-5.15/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch new file mode 100644 index 00000000000..d78febb237d --- /dev/null +++ b/queue-5.15/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch @@ -0,0 +1,74 @@ +From 1340c5c16e9dc4b5ab7f9d18a3223c15a97727d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 11:23:46 +0800 +Subject: net/smc: fix fallback failed while sendmsg with fastopen + +From: D. Wythe + +[ Upstream commit ce7ca794712f186da99719e8b4e97bd5ddbb04c3 ] + +Before determining whether the msg has unsupported options, it has been +prematurely terminated by the wrong status check. + +For the application, the general usages of MSG_FASTOPEN likes + +fd = socket(...) +/* rather than connect */ +sendto(fd, data, len, MSG_FASTOPEN) + +Hence, We need to check the flag before state check, because the sock +state here is always SMC_INIT when applications tries MSG_FASTOPEN. +Once we found unsupported options, fallback it to TCP. + +Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback") +Signed-off-by: D. Wythe +Signed-off-by: Simon Horman + +v2 -> v1: Optimize code style +Reviewed-by: Tony Lu + +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/af_smc.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index d5ddf283ed8e2..9cdb7df0801f3 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -2172,16 +2172,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + { + struct sock *sk = sock->sk; + struct smc_sock *smc; +- int rc = -EPIPE; ++ int rc; + + smc = smc_sk(sk); + lock_sock(sk); +- if ((sk->sk_state != SMC_ACTIVE) && +- (sk->sk_state != SMC_APPCLOSEWAIT1) && +- (sk->sk_state != SMC_INIT)) +- goto out; + ++ /* SMC does not support connect with fastopen */ + if (msg->msg_flags & MSG_FASTOPEN) { ++ /* not connected yet, fallback */ + if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) +@@ -2190,6 +2188,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + rc = -EINVAL; + goto out; + } ++ } else if ((sk->sk_state != SMC_ACTIVE) && ++ (sk->sk_state != SMC_APPCLOSEWAIT1) && ++ (sk->sk_state != SMC_INIT)) { ++ rc = -EPIPE; ++ goto out; + } + + if (smc->use_fallback) { +-- +2.39.2 + diff --git a/queue-5.15/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch b/queue-5.15/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch new file mode 100644 index 00000000000..2d1b5570a32 --- /dev/null +++ b/queue-5.15/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch @@ -0,0 +1,50 @@ +From b4d184f19c4565bbcf3998c15535315f8c44a85c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 14:21:43 +0800 +Subject: net: stmmac: add to set device wake up flag when stmmac init phy + +From: Rongguang Wei + +[ Upstream commit a9334b702a03b693f54ebd3b98f67bf722b74870 ] + +When MAC is not support PMT, driver will check PHY's WoL capability +and set device wakeup capability in stmmac_init_phy(). We can enable +the WoL through ethtool, the driver would enable the device wake up +flag. Now the device_may_wakeup() return true. + +But if there is a way which enable the PHY's WoL capability derectly, +like in BIOS. The driver would not know the enable thing and would not +set the device wake up flag. The phy_suspend may failed like this: + +[ 32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16 +[ 32.409065] PM: Device stmmac-1:00 failed to suspend: error -16 +[ 32.409067] PM: Some devices failed to suspend, or early wake event detected + +Add to set the device wakeup enable flag according to the get_wol +function result in PHY can fix the error in this scene. + +v2: add a Fixes tag. + +Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy") +Signed-off-by: Rongguang Wei +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index d56f65338ea66..728e68971c397 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1262,6 +1262,7 @@ static int stmmac_init_phy(struct net_device *dev) + + phylink_ethtool_get_wol(priv->phylink, &wol); + device_set_wakeup_capable(priv->device, !!wol.supported); ++ device_set_wakeup_enable(priv->device, !!wol.wolopts); + } + + return ret; +-- +2.39.2 + diff --git a/queue-5.15/netfilter-conntrack-adopt-safer-max-chain-length.patch b/queue-5.15/netfilter-conntrack-adopt-safer-max-chain-length.patch new file mode 100644 index 00000000000..e631b044019 --- /dev/null +++ b/queue-5.15/netfilter-conntrack-adopt-safer-max-chain-length.patch @@ -0,0 +1,53 @@ +From e1caa9d4e5228f86e5ce76f712cbc95844be8308 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 05:22:54 +0000 +Subject: netfilter: conntrack: adopt safer max chain length + +From: Eric Dumazet + +[ Upstream commit c77737b736ceb50fdf150434347dbd81ec76dbb1 ] + +Customers using GKE 1.25 and 1.26 are facing conntrack issues +root caused to commit c9c3b6811f74 ("netfilter: conntrack: make +max chain length random"). + +Even if we assume Uniform Hashing, a bucket often reachs 8 chained +items while the load factor of the hash table is smaller than 0.5 + +With a limit of 16, we reach load factors of 3. +With a limit of 32, we reach load factors of 11. +With a limit of 40, we reach load factors of 15. +With a limit of 50, we reach load factors of 24. + +This patch changes MIN_CHAINLEN to 50, to minimize risks. + +Ideally, we could in the future add a cushion based on expected +load factor (2 * nf_conntrack_max / nf_conntrack_buckets), +because some setups might expect unusual values. + +Fixes: c9c3b6811f74 ("netfilter: conntrack: make max chain length random") +Signed-off-by: Eric Dumazet +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conntrack_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 43ea8cfd374bb..7ff0da5f998a0 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex); + #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) + #define GC_SCAN_EXPIRED_MAX (64000u / HZ) + +-#define MIN_CHAINLEN 8u +-#define MAX_CHAINLEN (32u - MIN_CHAINLEN) ++#define MIN_CHAINLEN 50u ++#define MAX_CHAINLEN (80u - MIN_CHAINLEN) + + static struct conntrack_gc_work conntrack_gc_work; + +-- +2.39.2 + diff --git a/queue-5.15/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch b/queue-5.15/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch new file mode 100644 index 00000000000..57da0beba8a --- /dev/null +++ b/queue-5.15/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch @@ -0,0 +1,80 @@ +From 345d56b4554e3cfff0bb75481460d21b047c6ad2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 17:48:31 -0800 +Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type + +From: Ivan Delalande + +[ Upstream commit 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e ] + +It seems that change was unintentional, we have userspace code that +needs the mark while listening for events like REPLY, DESTROY, etc. +Also include 0-marks in requested dumps, as they were before that fix. + +Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark") +Signed-off-by: Ivan Delalande +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conntrack_netlink.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c +index 18a508783c282..8776e75b200ff 100644 +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -322,11 +322,12 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) ++static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, ++ bool dump) + { + u32 mark = READ_ONCE(ct->mark); + +- if (!mark) ++ if (!mark && !dump) + return 0; + + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) +@@ -337,7 +338,7 @@ static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) + return -1; + } + #else +-#define ctnetlink_dump_mark(a, b) (0) ++#define ctnetlink_dump_mark(a, b, c) (0) + #endif + + #ifdef CONFIG_NF_CONNTRACK_SECMARK +@@ -542,7 +543,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, + static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) + { + if (ctnetlink_dump_status(skb, ct) < 0 || +- ctnetlink_dump_mark(skb, ct) < 0 || ++ ctnetlink_dump_mark(skb, ct, true) < 0 || + ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0 || +@@ -825,8 +826,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if (events & (1 << IPCT_MARK) && +- ctnetlink_dump_mark(skb, ct) < 0) ++ if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) + goto nla_put_failure; + #endif + nlmsg_end(skb, nlh); +@@ -2759,7 +2759,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) + goto nla_put_failure; + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if (ctnetlink_dump_mark(skb, ct) < 0) ++ if (ctnetlink_dump_mark(skb, ct, true) < 0) + goto nla_put_failure; + #endif + if (ctnetlink_dump_labels(skb, ct) < 0) +-- +2.39.2 + diff --git a/queue-5.15/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch b/queue-5.15/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch new file mode 100644 index 00000000000..2b100ed8fae --- /dev/null +++ b/queue-5.15/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch @@ -0,0 +1,83 @@ +From 15e76052bb65d4294be31a730940447b29b938c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 10:58:56 +0100 +Subject: netfilter: tproxy: fix deadlock due to missing BH disable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Florian Westphal + +[ Upstream commit 4a02426787bf024dafdb79b362285ee325de3f5e ] + +The xtables packet traverser performs an unconditional local_bh_disable(), +but the nf_tables evaluation loop does not. + +Functions that are called from either xtables or nftables must assume +that they can be called in process context. + +inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. +If tproxy is used from nf_tables its possible that we'll deadlock +trying to aquire a lock already held in process context. + +Add a small helper that takes care of this and use it. + +Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ +Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") +Reported-and-tested-by: Major Dávid +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tproxy.h | 7 +++++++ + net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +- + net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +- + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h +index 82d0e41b76f22..faa108b1ba675 100644 +--- a/include/net/netfilter/nf_tproxy.h ++++ b/include/net/netfilter/nf_tproxy.h +@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) + return false; + } + ++static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) ++{ ++ local_bh_disable(); ++ inet_twsk_deschedule_put(tw); ++ local_bh_enable(); ++} ++ + /* assign a socket to the skb -- consumes sk */ + static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) + { +diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c +index b2bae0b0e42a1..61cb2341f50fe 100644 +--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c ++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c +@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + hp->source, lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { +- inet_twsk_deschedule_put(inet_twsk(sk)); ++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } +diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c +index 6bac68fb27a39..3fe4f15e01dc8 100644 +--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c ++++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c +@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { +- inet_twsk_deschedule_put(inet_twsk(sk)); ++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } +-- +2.39.2 + diff --git a/queue-5.15/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch b/queue-5.15/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch new file mode 100644 index 00000000000..3cc9fab1048 --- /dev/null +++ b/queue-5.15/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch @@ -0,0 +1,49 @@ +From f6ac3047bd83d49917ae3d6d7a5c4d6d8cc06c9f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 17:30:37 +0800 +Subject: nfc: fdp: add null check of devm_kmalloc_array in + fdp_nci_i2c_read_device_properties + +From: Kang Chen + +[ Upstream commit 11f180a5d62a51b484e9648f9b310e1bd50b1a57 ] + +devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause +out-of-bounds write in device_property_read_u8_array later. + +Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") +Signed-off-by: Kang Chen +Reviewed-by: Krzysztof Kozlowski +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/nfc/fdp/i2c.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c +index 051c43a2a52f8..5f97dcf08dd07 100644 +--- a/drivers/nfc/fdp/i2c.c ++++ b/drivers/nfc/fdp/i2c.c +@@ -249,6 +249,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, + len, sizeof(**fw_vsc_cfg), + GFP_KERNEL); + ++ if (!*fw_vsc_cfg) ++ goto alloc_err; ++ + r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME, + *fw_vsc_cfg, len); + +@@ -262,6 +265,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, + *fw_vsc_cfg = NULL; + } + ++alloc_err: + dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s", + *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no"); + } +-- +2.39.2 + diff --git a/queue-5.15/octeontx2-af-unlock-contexts-in-the-queue-context-ca.patch b/queue-5.15/octeontx2-af-unlock-contexts-in-the-queue-context-ca.patch new file mode 100644 index 00000000000..19086d88b6e --- /dev/null +++ b/queue-5.15/octeontx2-af-unlock-contexts-in-the-queue-context-ca.patch @@ -0,0 +1,229 @@ +From 33b0e354bc5726fc4ba414d3ef7f1522f002584d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 16:19:08 +0530 +Subject: octeontx2-af: Unlock contexts in the queue context cache in case of + fault detection + +From: Suman Ghosh + +[ Upstream commit ea9dd2e5c6d12c8b65ce7514c8359a70eeaa0e70 ] + +NDC caches contexts of frequently used queue's (Rx and Tx queues) +contexts. Due to a HW errata when NDC detects fault/poision while +accessing contexts it could go into an illegal state where a cache +line could get locked forever. To makesure all cache lines in NDC +are available for optimum performance upon fault/lockerror/posion +errors scan through all cache lines in NDC and clear the lock bit. + +Fixes: 4a3581cd5995 ("octeontx2-af: NPA AQ instruction enqueue support") +Signed-off-by: Suman Ghosh +Signed-off-by: Sunil Kovvuri Goutham +Signed-off-by: Sai Krishna +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeontx2/af/rvu.h | 5 ++ + .../marvell/octeontx2/af/rvu_debugfs.c | 7 +-- + .../ethernet/marvell/octeontx2/af/rvu_nix.c | 16 ++++- + .../ethernet/marvell/octeontx2/af/rvu_npa.c | 58 ++++++++++++++++++- + .../ethernet/marvell/octeontx2/af/rvu_reg.h | 3 + + 5 files changed, 82 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +index a7213db38804b..fed49d6a178d0 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +@@ -811,6 +811,9 @@ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, + /* CPT APIs */ + int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); + ++#define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) ++#define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) ++ + /* CN10K RVU */ + int rvu_set_channels_base(struct rvu *rvu); + void rvu_program_channels(struct rvu *rvu); +@@ -826,6 +829,8 @@ static inline void rvu_dbg_init(struct rvu *rvu) {} + static inline void rvu_dbg_exit(struct rvu *rvu) {} + #endif + ++int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr); ++ + /* RVU Switch */ + void rvu_switch_enable(struct rvu *rvu); + void rvu_switch_disable(struct rvu *rvu); +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +index 66d34699f160c..4dddf6ec3be87 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +@@ -196,9 +196,6 @@ enum cpt_eng_type { + CPT_IE_TYPE = 3, + }; + +-#define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \ +- blk_addr, NDC_AF_CONST) & 0xFF) +- + #define rvu_dbg_NULL NULL + #define rvu_dbg_open_NULL NULL + +@@ -1009,6 +1006,7 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) + struct nix_hw *nix_hw; + struct rvu *rvu; + int bank, max_bank; ++ u64 ndc_af_const; + + if (blk_addr == BLKADDR_NDC_NPA0) { + rvu = s->private; +@@ -1017,7 +1015,8 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) + rvu = nix_hw->rvu; + } + +- max_bank = NDC_MAX_BANK(rvu, blk_addr); ++ ndc_af_const = rvu_read64(rvu, blk_addr, NDC_AF_CONST); ++ max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const); + for (bank = 0; bank < max_bank; bank++) { + seq_printf(s, "BANK:%d\n", bank); + seq_printf(s, "\tHits:\t%lld\n", +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +index 09892703cfd46..d274d552924a3 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +@@ -797,6 +797,7 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + struct nix_aq_res_s *result; + int timeout = 1000; + u64 reg, head; ++ int ret; + + result = (struct nix_aq_res_s *)aq->res->base; + +@@ -820,9 +821,22 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + return -EBUSY; + } + +- if (result->compcode != NIX_AQ_COMP_GOOD) ++ if (result->compcode != NIX_AQ_COMP_GOOD) { + /* TODO: Replace this with some error code */ ++ if (result->compcode == NIX_AQ_COMP_CTX_FAULT || ++ result->compcode == NIX_AQ_COMP_LOCKERR || ++ result->compcode == NIX_AQ_COMP_CTX_POISON) { ++ ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX); ++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX); ++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX); ++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_TX); ++ if (ret) ++ dev_err(rvu->dev, ++ "%s: Not able to unlock cachelines\n", __func__); ++ } ++ + return -EBUSY; ++ } + + return 0; + } +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c +index 70bd036ed76e4..4f5ca5ab13a40 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c +@@ -4,7 +4,7 @@ + * Copyright (C) 2018 Marvell. + * + */ +- ++#include + #include + #include + +@@ -42,9 +42,18 @@ static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + return -EBUSY; + } + +- if (result->compcode != NPA_AQ_COMP_GOOD) ++ if (result->compcode != NPA_AQ_COMP_GOOD) { + /* TODO: Replace this with some error code */ ++ if (result->compcode == NPA_AQ_COMP_CTX_FAULT || ++ result->compcode == NPA_AQ_COMP_LOCKERR || ++ result->compcode == NPA_AQ_COMP_CTX_POISON) { ++ if (rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NPA0)) ++ dev_err(rvu->dev, ++ "%s: Not able to unlock cachelines\n", __func__); ++ } ++ + return -EBUSY; ++ } + + return 0; + } +@@ -545,3 +554,48 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf) + + npa_ctx_free(rvu, pfvf); + } ++ ++/* Due to an Hardware errata, in some corner cases, AQ context lock ++ * operations can result in a NDC way getting into an illegal state ++ * of not valid but locked. ++ * ++ * This API solves the problem by clearing the lock bit of the NDC block. ++ * The operation needs to be done for each line of all the NDC banks. ++ */ ++int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr) ++{ ++ int bank, max_bank, line, max_line, err; ++ u64 reg, ndc_af_const; ++ ++ /* Set the ENABLE bit(63) to '0' */ ++ reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL); ++ rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0)); ++ ++ /* Poll until the BUSY bits(47:32) are set to '0' */ ++ err = rvu_poll_reg(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, GENMASK_ULL(47, 32), true); ++ if (err) { ++ dev_err(rvu->dev, "Timed out while polling for NDC CAM busy bits.\n"); ++ return err; ++ } ++ ++ ndc_af_const = rvu_read64(rvu, blkaddr, NDC_AF_CONST); ++ max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const); ++ max_line = FIELD_GET(NDC_AF_BANK_LINE_MASK, ndc_af_const); ++ for (bank = 0; bank < max_bank; bank++) { ++ for (line = 0; line < max_line; line++) { ++ /* Check if 'cache line valid bit(63)' is not set ++ * but 'cache line lock bit(60)' is set and on ++ * success, reset the lock bit(60). ++ */ ++ reg = rvu_read64(rvu, blkaddr, ++ NDC_AF_BANKX_LINEX_METADATA(bank, line)); ++ if (!(reg & BIT_ULL(63)) && (reg & BIT_ULL(60))) { ++ rvu_write64(rvu, blkaddr, ++ NDC_AF_BANKX_LINEX_METADATA(bank, line), ++ reg & ~BIT_ULL(60)); ++ } ++ } ++ } ++ ++ return 0; ++} +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +index 21f1ed4e222f7..d81b63a0d430f 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +@@ -670,6 +670,7 @@ + #define NDC_AF_INTR_ENA_W1S (0x00068) + #define NDC_AF_INTR_ENA_W1C (0x00070) + #define NDC_AF_ACTIVE_PC (0x00078) ++#define NDC_AF_CAMS_RD_INTERVAL (0x00080) + #define NDC_AF_BP_TEST_ENABLE (0x001F8) + #define NDC_AF_BP_TEST(a) (0x00200 | (a) << 3) + #define NDC_AF_BLK_RST (0x002F0) +@@ -685,6 +686,8 @@ + (0x00F00 | (a) << 5 | (b) << 4) + #define NDC_AF_BANKX_HIT_PC(a) (0x01000 | (a) << 3) + #define NDC_AF_BANKX_MISS_PC(a) (0x01100 | (a) << 3) ++#define NDC_AF_BANKX_LINEX_METADATA(a, b) \ ++ (0x10000 | (a) << 12 | (b) << 3) + + /* LBK */ + #define LBK_CONST (0x10ull) +-- +2.39.2 + diff --git a/queue-5.15/perf-stat-fix-counting-when-initial-delay-configured.patch b/queue-5.15/perf-stat-fix-counting-when-initial-delay-configured.patch new file mode 100644 index 00000000000..d69966575fe --- /dev/null +++ b/queue-5.15/perf-stat-fix-counting-when-initial-delay-configured.patch @@ -0,0 +1,178 @@ +From 67e3ba97975f46659bfb079cb64a269a675c2328 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 11:11:44 +0800 +Subject: perf stat: Fix counting when initial delay configured + +From: Changbin Du + +[ Upstream commit 25f69c69bc3ca8c781a94473f28d443d745768e3 ] + +When creating counters with initial delay configured, the enable_on_exec +field is not set. So we need to enable the counters later. The problem +is, when a workload is specified the target__none() is true. So we also +need to check stat_config.initial_delay. + +In this change, we add a new field 'initial_delay' for struct target +which could be shared by other subcommands. And define +target__enable_on_exec() which returns whether enable_on_exec should be +set on normal cases. + +Before this fix the event is not counted: + + $ ./perf stat -e instructions -D 100 sleep 2 + Events disabled + Events enabled + + Performance counter stats for 'sleep 2': + + instructions + + 1.901661124 seconds time elapsed + + 0.001602000 seconds user + 0.000000000 seconds sys + +After fix it works: + + $ ./perf stat -e instructions -D 100 sleep 2 + Events disabled + Events enabled + + Performance counter stats for 'sleep 2': + + 404,214 instructions + + 1.901743475 seconds time elapsed + + 0.001617000 seconds user + 0.000000000 seconds sys + +Fixes: c587e77e100fa40e ("perf stat: Do not delay the workload with --delay") +Signed-off-by: Changbin Du +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Hui Wang +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20230302031146.2801588-2-changbin.du@huawei.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/builtin-stat.c | 15 +++++---------- + tools/perf/util/stat.c | 6 +----- + tools/perf/util/stat.h | 1 - + tools/perf/util/target.h | 12 ++++++++++++ + 4 files changed, 18 insertions(+), 16 deletions(-) + +diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c +index aad65c95c3711..0b709e3ead2ac 100644 +--- a/tools/perf/builtin-stat.c ++++ b/tools/perf/builtin-stat.c +@@ -558,12 +558,7 @@ static int enable_counters(void) + return err; + } + +- /* +- * We need to enable counters only if: +- * - we don't have tracee (attaching to task or cpu) +- * - we have initial delay configured +- */ +- if (!target__none(&target)) { ++ if (!target__enable_on_exec(&target)) { + if (!all_counters_use_bpf) + evlist__enable(evsel_list); + } +@@ -941,7 +936,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) + return err; + } + +- if (stat_config.initial_delay) { ++ if (target.initial_delay) { + pr_info(EVLIST_DISABLED_MSG); + } else { + err = enable_counters(); +@@ -953,8 +948,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) + if (forks) + evlist__start_workload(evsel_list); + +- if (stat_config.initial_delay > 0) { +- usleep(stat_config.initial_delay * USEC_PER_MSEC); ++ if (target.initial_delay > 0) { ++ usleep(target.initial_delay * USEC_PER_MSEC); + err = enable_counters(); + if (err) + return -1; +@@ -1244,7 +1239,7 @@ static struct option stat_options[] = { + "aggregate counts per thread", AGGR_THREAD), + OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, + "aggregate counts per numa node", AGGR_NODE), +- OPT_INTEGER('D', "delay", &stat_config.initial_delay, ++ OPT_INTEGER('D', "delay", &target.initial_delay, + "ms to wait before starting measurement after program start (-1: start with events disabled)"), + OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, + "Only print computed metrics. No raw values", enable_metric_only), +diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c +index 09ea334586f23..5a0b3db1cab11 100644 +--- a/tools/perf/util/stat.c ++++ b/tools/perf/util/stat.c +@@ -576,11 +576,7 @@ int create_perf_stat_counter(struct evsel *evsel, + if (evsel__is_group_leader(evsel)) { + attr->disabled = 1; + +- /* +- * In case of initial_delay we enable tracee +- * events manually. +- */ +- if (target__none(target) && !config->initial_delay) ++ if (target__enable_on_exec(target)) + attr->enable_on_exec = 1; + } + +diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h +index 32c8527de3478..977616cf69e46 100644 +--- a/tools/perf/util/stat.h ++++ b/tools/perf/util/stat.h +@@ -137,7 +137,6 @@ struct perf_stat_config { + FILE *output; + unsigned int interval; + unsigned int timeout; +- int initial_delay; + unsigned int unit_width; + unsigned int metric_only_len; + int times; +diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h +index daec6cba500d4..880f1af7f6ad6 100644 +--- a/tools/perf/util/target.h ++++ b/tools/perf/util/target.h +@@ -18,6 +18,7 @@ struct target { + bool per_thread; + bool use_bpf; + bool hybrid; ++ int initial_delay; + const char *attr_map; + }; + +@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target) + return !target__has_task(target) && !target__has_cpu(target); + } + ++static inline bool target__enable_on_exec(struct target *target) ++{ ++ /* ++ * Normally enable_on_exec should be set if: ++ * 1) The tracee process is forked (not attaching to existed task or cpu). ++ * 2) And initial_delay is not configured. ++ * Otherwise, we enable tracee events manually. ++ */ ++ return target__none(target) && !target->initial_delay; ++} ++ + static inline bool target__has_per_thread(struct target *target) + { + return target->system_wide && target->per_thread; +-- +2.39.2 + diff --git a/queue-5.15/platform-x86-mlx_platform-select-regmap-instead-of-d.patch b/queue-5.15/platform-x86-mlx_platform-select-regmap-instead-of-d.patch new file mode 100644 index 00000000000..fc477fd27ee --- /dev/null +++ b/queue-5.15/platform-x86-mlx_platform-select-regmap-instead-of-d.patch @@ -0,0 +1,50 @@ +From 7ebde74135fe09d90bcd965c45a5e071f3faaf3c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 25 Feb 2023 21:39:51 -0800 +Subject: platform: x86: MLX_PLATFORM: select REGMAP instead of depending on it + +From: Randy Dunlap + +[ Upstream commit 7e7e1541c91615e9950d0b96bcd1806d297e970e ] + +REGMAP is a hidden (not user visible) symbol. Users cannot set it +directly thru "make *config", so drivers should select it instead of +depending on it if they need it. + +Consistently using "select" or "depends on" can also help reduce +Kconfig circular dependency issues. + +Therefore, change the use of "depends on REGMAP" to "select REGMAP". + +Fixes: ef0f62264b2a ("platform/x86: mlx-platform: Add physical bus number auto detection") +Signed-off-by: Randy Dunlap +Cc: Vadim Pasternak +Cc: Darren Hart +Cc: Hans de Goede +Cc: Mark Gross +Cc: platform-driver-x86@vger.kernel.org +Link: https://lore.kernel.org/r/20230226053953.4681-7-rdunlap@infradead.org +Signed-off-by: Hans de Goede +Reviewed-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/Kconfig | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig +index cd8146dbdd453..61186829d1f6b 100644 +--- a/drivers/platform/x86/Kconfig ++++ b/drivers/platform/x86/Kconfig +@@ -943,7 +943,8 @@ config I2C_MULTI_INSTANTIATE + + config MLX_PLATFORM + tristate "Mellanox Technologies platform support" +- depends on I2C && REGMAP ++ depends on I2C ++ select REGMAP + help + This option enables system support for the Mellanox Technologies + platform. The Mellanox systems provide data center networking +-- +2.39.2 + diff --git a/queue-5.15/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch b/queue-5.15/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch new file mode 100644 index 00000000000..84286c01d69 --- /dev/null +++ b/queue-5.15/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch @@ -0,0 +1,36 @@ +From ff709f26d5dd3aaec61dea8d2813a322d8dabb60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Feb 2023 17:59:39 +0200 +Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards + +From: Vladimir Oltean + +[ Upstream commit ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 ] + +It looks like U-Boot fails to start the kernel properly when the +compatible string of the board isn't fsl,T1040RDB, so stop overriding it +from the rev-a.dts. + +Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts +index 73f8c998c64df..d4f5f159d6f23 100644 +--- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts ++++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts +@@ -10,7 +10,6 @@ + + / { + model = "fsl,T1040RDB-REV-A"; +- compatible = "fsl,T1040RDB-REV-A"; + }; + + &seville_port0 { +-- +2.39.2 + diff --git a/queue-5.15/regulator-core-fix-off-on-delay-us-for-always-on-boo.patch b/queue-5.15/regulator-core-fix-off-on-delay-us-for-always-on-boo.patch new file mode 100644 index 00000000000..caa21cc2e10 --- /dev/null +++ b/queue-5.15/regulator-core-fix-off-on-delay-us-for-always-on-boo.patch @@ -0,0 +1,57 @@ +From b9341ba590f2b69d02eb0dc0e22e0a3f3d40b738 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Jul 2022 16:02:00 +0200 +Subject: regulator: core: Fix off-on-delay-us for always-on/boot-on regulators +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian Kohlschütter + +[ Upstream commit 218320fec29430438016f88dd4fbebfa1b95ad8d ] + +Regulators marked with "regulator-always-on" or "regulator-boot-on" +as well as an "off-on-delay-us", may run into cycling issues that are +hard to detect. + +This is caused by the "last_off" state not being initialized in this +case. + +Fix the "last_off" initialization by setting it to the current kernel +time upon initialization, regardless of always_on/boot_on state. + +Signed-off-by: Christian Kohlschütter +Link: https://lore.kernel.org/r/FAFD5B39-E9C4-47C7-ACF1-2A04CD59758D@kohlschutter.com +Signed-off-by: Mark Brown +Stable-dep-of: 80d2c29e09e6 ("regulator: core: Use ktime_get_boottime() to determine how long a regulator was off") +Signed-off-by: Sasha Levin +--- + drivers/regulator/core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c +index 450aa0756dd8c..c7b1e15bf7bb5 100644 +--- a/drivers/regulator/core.c ++++ b/drivers/regulator/core.c +@@ -1539,6 +1539,9 @@ static int set_machine_constraints(struct regulator_dev *rdev) + rdev->constraints->always_on = true; + } + ++ if (rdev->desc->off_on_delay) ++ rdev->last_off = ktime_get(); ++ + /* If the constraints say the regulator should be on at this point + * and we have control then make sure it is enabled. + */ +@@ -1572,8 +1575,6 @@ static int set_machine_constraints(struct regulator_dev *rdev) + + if (rdev->constraints->always_on) + rdev->use_count++; +- } else if (rdev->desc->off_on_delay) { +- rdev->last_off = ktime_get(); + } + + print_constraints(rdev); +-- +2.39.2 + diff --git a/queue-5.15/regulator-core-use-ktime_get_boottime-to-determine-h.patch b/queue-5.15/regulator-core-use-ktime_get_boottime-to-determine-h.patch new file mode 100644 index 00000000000..125714b421e --- /dev/null +++ b/queue-5.15/regulator-core-use-ktime_get_boottime-to-determine-h.patch @@ -0,0 +1,65 @@ +From 29d3bc11158389269b4161707bff6b23816fe1a2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Feb 2023 00:33:30 +0000 +Subject: regulator: core: Use ktime_get_boottime() to determine how long a + regulator was off + +From: Matthias Kaehlcke + +[ Upstream commit 80d2c29e09e663761c2778167a625b25ffe01b6f ] + +For regulators with 'off-on-delay-us' the regulator framework currently +uses ktime_get() to determine how long the regulator has been off +before re-enabling it (after a delay if needed). A problem with using +ktime_get() is that it doesn't account for the time the system is +suspended. As a result a regulator with a longer 'off-on-delay' (e.g. +500ms) that was switched off during suspend might still incurr in a +delay on resume before it is re-enabled, even though the regulator +might have been off for hours. ktime_get_boottime() accounts for +suspend time, use it instead of ktime_get(). + +Fixes: a8ce7bd89689 ("regulator: core: Fix off_on_delay handling") +Cc: stable@vger.kernel.org # 5.13+ +Signed-off-by: Matthias Kaehlcke +Reviewed-by: Stephen Boyd +Link: https://lore.kernel.org/r/20230223003301.v2.1.I9719661b8eb0a73b8c416f9c26cf5bd8c0563f99@changeid +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/regulator/core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c +index c7b1e15bf7bb5..cd10880378a6d 100644 +--- a/drivers/regulator/core.c ++++ b/drivers/regulator/core.c +@@ -1540,7 +1540,7 @@ static int set_machine_constraints(struct regulator_dev *rdev) + } + + if (rdev->desc->off_on_delay) +- rdev->last_off = ktime_get(); ++ rdev->last_off = ktime_get_boottime(); + + /* If the constraints say the regulator should be on at this point + * and we have control then make sure it is enabled. +@@ -2629,7 +2629,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev) + * this regulator was disabled. + */ + ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay); +- s64 remaining = ktime_us_delta(end, ktime_get()); ++ s64 remaining = ktime_us_delta(end, ktime_get_boottime()); + + if (remaining > 0) + _regulator_enable_delay(remaining); +@@ -2868,7 +2868,7 @@ static int _regulator_do_disable(struct regulator_dev *rdev) + } + + if (rdev->desc->off_on_delay) +- rdev->last_off = ktime_get(); ++ rdev->last_off = ktime_get_boottime(); + + trace_regulator_disable_complete(rdev_get_name(rdev)); + +-- +2.39.2 + diff --git a/queue-5.15/regulator-flag-uncontrollable-regulators-as-always_o.patch b/queue-5.15/regulator-flag-uncontrollable-regulators-as-always_o.patch new file mode 100644 index 00000000000..febb4f05c6c --- /dev/null +++ b/queue-5.15/regulator-flag-uncontrollable-regulators-as-always_o.patch @@ -0,0 +1,58 @@ +From 39a57d9dc4272f534fa0e9a0cfde44958eba978d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Mar 2022 14:46:37 +0000 +Subject: regulator: Flag uncontrollable regulators as always_on + +From: Mark Brown + +[ Upstream commit 261f06315cf7c3744731e36bfd8d4434949e3389 ] + +While we currently assume that regulators with no control available are +just uncontionally enabled this isn't always as clearly displayed to +users as is desirable, for example the code for disabling unused +regulators will log that it is about to disable them. Clean this up a +bit by setting always_on during constraint evaluation if we have no +available mechanism for controlling the regualtor so things that check +the constraint will do the right thing. + +Signed-off-by: Mark Brown +Link: https://lore.kernel.org/r/20220325144637.1543496-1-broonie@kernel.org +Signed-off-by: Mark Brown +Stable-dep-of: 80d2c29e09e6 ("regulator: core: Use ktime_get_boottime() to determine how long a regulator was off") +Signed-off-by: Sasha Levin +--- + drivers/regulator/core.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c +index 3eae3aa5ad1d2..450aa0756dd8c 100644 +--- a/drivers/regulator/core.c ++++ b/drivers/regulator/core.c +@@ -1521,6 +1521,24 @@ static int set_machine_constraints(struct regulator_dev *rdev) + } + } + ++ /* ++ * If there is no mechanism for controlling the regulator then ++ * flag it as always_on so we don't end up duplicating checks ++ * for this so much. Note that we could control the state of ++ * a supply to control the output on a regulator that has no ++ * direct control. ++ */ ++ if (!rdev->ena_pin && !ops->enable) { ++ if (rdev->supply_name && !rdev->supply) ++ return -EPROBE_DEFER; ++ ++ if (rdev->supply) ++ rdev->constraints->always_on = ++ rdev->supply->rdev->constraints->always_on; ++ else ++ rdev->constraints->always_on = true; ++ } ++ + /* If the constraints say the regulator should be on at this point + * and we have control then make sure it is enabled. + */ +-- +2.39.2 + diff --git a/queue-5.15/risc-v-avoid-dereferening-null-regs-in-die.patch b/queue-5.15/risc-v-avoid-dereferening-null-regs-in-die.patch new file mode 100644 index 00000000000..ede22ba8e56 --- /dev/null +++ b/queue-5.15/risc-v-avoid-dereferening-null-regs-in-die.patch @@ -0,0 +1,57 @@ +From d0314ed4bc5e234b099a189eb49d23a62620214f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Sep 2022 13:00:37 -0700 +Subject: RISC-V: Avoid dereferening NULL regs in die() + +From: Palmer Dabbelt + +[ Upstream commit f2913d006fcdb61719635e093d1b5dd0dafecac7 ] + +I don't think we can actually die() without a regs pointer, but the +compiler was warning about a NULL check after a dereference. It seems +prudent to just avoid the possibly-NULL dereference, given that when +die()ing the system is already toast so who knows how we got there. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20220920200037.6727-1-palmer@rivosinc.com +Signed-off-by: Palmer Dabbelt +Stable-dep-of: 130aee3fd998 ("riscv: Avoid enabling interrupts in die()") +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index 6084bd93d2f58..502cba5029ca4 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -33,6 +33,7 @@ void die(struct pt_regs *regs, const char *str) + { + static int die_counter; + int ret; ++ long cause; + + oops_enter(); + +@@ -42,11 +43,13 @@ void die(struct pt_regs *regs, const char *str) + + pr_emerg("%s [#%d]\n", str, ++die_counter); + print_modules(); +- show_regs(regs); ++ if (regs) ++ show_regs(regs); + +- ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV); ++ cause = regs ? regs->cause : -1; ++ ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV); + +- if (regs && kexec_should_crash(current)) ++ if (kexec_should_crash(current)) + crash_kexec(regs); + + bust_spinlocks(0); +-- +2.39.2 + diff --git a/queue-5.15/risc-v-don-t-check-text_mutex-during-stop_machine.patch b/queue-5.15/risc-v-don-t-check-text_mutex-during-stop_machine.patch new file mode 100644 index 00000000000..f96fe338215 --- /dev/null +++ b/queue-5.15/risc-v-don-t-check-text_mutex-during-stop_machine.patch @@ -0,0 +1,163 @@ +From 512c5f9218dbd55b7ac39359879e9a1b6be562e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 14:37:55 +0000 +Subject: RISC-V: Don't check text_mutex during stop_machine + +From: Conor Dooley + +[ Upstream commit 2a8db5ec4a28a0fce822d10224db9471a44b6925 ] + +We're currently using stop_machine() to update ftrace & kprobes, which +means that the thread that takes text_mutex during may not be the same +as the thread that eventually patches the code. This isn't actually a +race because the lock is still held (preventing any other concurrent +accesses) and there is only one thread running during stop_machine(), +but it does trigger a lockdep failure. + +This patch just elides the lockdep check during stop_machine. + +Fixes: c15ac4fd60d5 ("riscv/ftrace: Add dynamic function tracer support") +Suggested-by: Steven Rostedt +Reported-by: Changbin Du +Signed-off-by: Palmer Dabbelt +Signed-off-by: Conor Dooley +Link: https://lore.kernel.org/r/20230303143754.4005217-1-conor.dooley@microchip.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/ftrace.h | 2 +- + arch/riscv/include/asm/patch.h | 2 ++ + arch/riscv/kernel/ftrace.c | 14 ++++++++++++-- + arch/riscv/kernel/patch.c | 28 +++++++++++++++++++++++++--- + 4 files changed, 40 insertions(+), 6 deletions(-) + +diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h +index 9e73922e1e2e5..d47d87c2d7e3d 100644 +--- a/arch/riscv/include/asm/ftrace.h ++++ b/arch/riscv/include/asm/ftrace.h +@@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); + #define ftrace_init_nop ftrace_init_nop + #endif + +-#endif ++#endif /* CONFIG_DYNAMIC_FTRACE */ + + #endif /* _ASM_RISCV_FTRACE_H */ +diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h +index 9a7d7346001ee..98d9de07cba17 100644 +--- a/arch/riscv/include/asm/patch.h ++++ b/arch/riscv/include/asm/patch.h +@@ -9,4 +9,6 @@ + int patch_text_nosync(void *addr, const void *insns, size_t len); + int patch_text(void *addr, u32 insn); + ++extern int riscv_patch_in_stop_machine; ++ + #endif /* _ASM_RISCV_PATCH_H */ +diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c +index 47b43d8ee9a6c..1bf92cfa6764e 100644 +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -15,11 +15,21 @@ + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) + { + mutex_lock(&text_mutex); ++ ++ /* ++ * The code sequences we use for ftrace can't be patched while the ++ * kernel is running, so we need to use stop_machine() to modify them ++ * for now. This doesn't play nice with text_mutex, we use this flag ++ * to elide the check. ++ */ ++ riscv_patch_in_stop_machine = true; ++ + return 0; + } + + int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) + { ++ riscv_patch_in_stop_machine = false; + mutex_unlock(&text_mutex); + return 0; + } +@@ -109,9 +119,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) + { + int out; + +- ftrace_arch_code_modify_prepare(); ++ mutex_lock(&text_mutex); + out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); +- ftrace_arch_code_modify_post_process(); ++ mutex_unlock(&text_mutex); + + return out; + } +diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c +index 765004b605132..e099961453cca 100644 +--- a/arch/riscv/kernel/patch.c ++++ b/arch/riscv/kernel/patch.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + struct patch_insn { +@@ -19,6 +20,8 @@ struct patch_insn { + atomic_t cpu_count; + }; + ++int riscv_patch_in_stop_machine = false; ++ + #ifdef CONFIG_MMU + /* + * The fix_to_virt(, idx) needs a const value (not a dynamic variable of +@@ -59,8 +62,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) + * Before reaching here, it was expected to lock the text_mutex + * already, so we don't need to give another lock here and could + * ensure that it was safe between each cores. ++ * ++ * We're currently using stop_machine() for ftrace & kprobes, and while ++ * that ensures text_mutex is held before installing the mappings it ++ * does not ensure text_mutex is held by the calling thread. That's ++ * safe but triggers a lockdep failure, so just elide it for that ++ * specific case. + */ +- lockdep_assert_held(&text_mutex); ++ if (!riscv_patch_in_stop_machine) ++ lockdep_assert_held(&text_mutex); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); +@@ -121,13 +131,25 @@ NOKPROBE_SYMBOL(patch_text_cb); + + int patch_text(void *addr, u32 insn) + { ++ int ret; + struct patch_insn patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + +- return stop_machine_cpuslocked(patch_text_cb, +- &patch, cpu_online_mask); ++ /* ++ * kprobes takes text_mutex, before calling patch_text(), but as we call ++ * calls stop_machine(), the lockdep assertion in patch_insn_write() ++ * gets confused by the context in which the lock is taken. ++ * Instead, ensure the lock is held before calling stop_machine(), and ++ * set riscv_patch_in_stop_machine to skip the check in ++ * patch_insn_write(). ++ */ ++ lockdep_assert_held(&text_mutex); ++ riscv_patch_in_stop_machine = true; ++ ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask); ++ riscv_patch_in_stop_machine = false; ++ return ret; + } + NOKPROBE_SYMBOL(patch_text); +-- +2.39.2 + diff --git a/queue-5.15/riscv-add-header-include-guards-to-insn.h.patch b/queue-5.15/riscv-add-header-include-guards-to-insn.h.patch new file mode 100644 index 00000000000..e5e4d26118a --- /dev/null +++ b/queue-5.15/riscv-add-header-include-guards-to-insn.h.patch @@ -0,0 +1,48 @@ +From 532709216bdea4044c0a2c2dcad945ca147005fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 29 Jan 2023 17:42:42 +0800 +Subject: riscv: Add header include guards to insn.h + +From: Liao Chang + +[ Upstream commit 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 ] + +Add header include guards to insn.h to prevent repeating declaration of +any identifiers in insn.h. + +Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB") +Signed-off-by: Liao Chang +Reviewed-by: Andrew Jones +Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h") +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/parse_asm.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/riscv/include/asm/parse_asm.h b/arch/riscv/include/asm/parse_asm.h +index f36368de839f5..3cd00332d70f5 100644 +--- a/arch/riscv/include/asm/parse_asm.h ++++ b/arch/riscv/include/asm/parse_asm.h +@@ -3,6 +3,9 @@ + * Copyright (C) 2020 SiFive + */ + ++#ifndef _ASM_RISCV_INSN_H ++#define _ASM_RISCV_INSN_H ++ + #include + + /* The bit field of immediate value in I-type instruction */ +@@ -217,3 +220,5 @@ static inline bool is_ ## INSN_NAME ## _insn(long insn) \ + (RVC_X(x_, RVC_B_IMM_5_OPOFF, RVC_B_IMM_5_MASK) << RVC_B_IMM_5_OFF) | \ + (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \ + (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); }) ++ ++#endif /* _ASM_RISCV_INSN_H */ +-- +2.39.2 + diff --git a/queue-5.15/riscv-avoid-enabling-interrupts-in-die.patch b/queue-5.15/riscv-avoid-enabling-interrupts-in-die.patch new file mode 100644 index 00000000000..a5e58cb15f1 --- /dev/null +++ b/queue-5.15/riscv-avoid-enabling-interrupts-in-die.patch @@ -0,0 +1,60 @@ +From 78d45238db39285a18c8b44280d11c6cecec4572 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 14:48:28 +0000 +Subject: riscv: Avoid enabling interrupts in die() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mattias Nissler + +[ Upstream commit 130aee3fd9981297ff9354e5d5609cd59aafbbea ] + +While working on something else, I noticed that the kernel would start +accepting interrupts again after crashing in an interrupt handler. Since +the kernel is already in inconsistent state, enabling interrupts is +dangerous and opens up risk of kernel state deteriorating further. +Interrupts do get enabled via what looks like an unintended side effect of +spin_unlock_irq, so switch to the more cautious +spin_lock_irqsave/spin_unlock_irqrestore instead. + +Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") +Signed-off-by: Mattias Nissler +Reviewed-by: Björn Töpel +Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index 502cba5029ca4..4f38b3c47e6d5 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -34,10 +34,11 @@ void die(struct pt_regs *regs, const char *str) + static int die_counter; + int ret; + long cause; ++ unsigned long flags; + + oops_enter(); + +- spin_lock_irq(&die_lock); ++ spin_lock_irqsave(&die_lock, flags); + console_verbose(); + bust_spinlocks(1); + +@@ -54,7 +55,7 @@ void die(struct pt_regs *regs, const char *str) + + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); +- spin_unlock_irq(&die_lock); ++ spin_unlock_irqrestore(&die_lock, flags); + oops_exit(); + + if (in_interrupt()) +-- +2.39.2 + diff --git a/queue-5.15/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch b/queue-5.15/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch new file mode 100644 index 00000000000..222e4855247 --- /dev/null +++ b/queue-5.15/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch @@ -0,0 +1,99 @@ +From 7b98560fa99221567511b0751a8084affb783df9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Mar 2023 10:16:39 +0100 +Subject: riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode + +From: Alexandre Ghiti + +[ Upstream commit 76950340cf03b149412fe0d5f0810e52ac1df8cb ] + +When CONFIG_FRAME_POINTER is unset, the stack unwinding function +walk_stackframe randomly reads the stack and then, when KASAN is enabled, +it can lead to the following backtrace: + +[ 0.000000] ================================================================== +[ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a +[ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0 +[ 0.000000] +[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43 +[ 0.000000] Hardware name: riscv-virtio,qemu (DT) +[ 0.000000] Call Trace: +[ 0.000000] [] walk_stackframe+0x0/0x11a +[ 0.000000] [] init_param_lock+0x26/0x2a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] dump_stack_lvl+0x22/0x36 +[ 0.000000] [] print_report+0x198/0x4a8 +[ 0.000000] [] init_param_lock+0x26/0x2a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] kasan_report+0x9a/0xc8 +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] desc_make_final+0x80/0x84 +[ 0.000000] [] stack_trace_save+0x88/0xa6 +[ 0.000000] [] filter_irq_stacks+0x72/0x76 +[ 0.000000] [] devkmsg_read+0x32a/0x32e +[ 0.000000] [] kasan_save_stack+0x28/0x52 +[ 0.000000] [] desc_make_final+0x7c/0x84 +[ 0.000000] [] stack_trace_save+0x84/0xa6 +[ 0.000000] [] kasan_set_track+0x12/0x20 +[ 0.000000] [] __kasan_slab_alloc+0x58/0x5e +[ 0.000000] [] __kmem_cache_create+0x21e/0x39a +[ 0.000000] [] create_boot_cache+0x70/0x9c +[ 0.000000] [] kmem_cache_init+0x6c/0x11e +[ 0.000000] [] mm_init+0xd8/0xfe +[ 0.000000] [] start_kernel+0x190/0x3ca +[ 0.000000] +[ 0.000000] The buggy address belongs to stack of task swapper/0 +[ 0.000000] and is located at offset 0 in frame: +[ 0.000000] stack_trace_save+0x0/0xa6 +[ 0.000000] +[ 0.000000] This frame has 1 object: +[ 0.000000] [32, 56) 'c' +[ 0.000000] +[ 0.000000] The buggy address belongs to the physical page: +[ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07 +[ 0.000000] flags: 0x1000(reserved|zone=0) +[ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000 +[ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff +[ 0.000000] page dumped because: kasan: bad access detected +[ 0.000000] +[ 0.000000] Memory state around the buggy address: +[ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3 +[ 0.000000] ^ +[ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ================================================================== + +Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise +mode. + +Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") +Reported-by: Chathura Rajapaksha +Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/ +Suggested-by: Dmitry Vyukov +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/stacktrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c +index ee8ef91c8aaf4..894ae66421a76 100644 +--- a/arch/riscv/kernel/stacktrace.c ++++ b/arch/riscv/kernel/stacktrace.c +@@ -94,7 +94,7 @@ void notrace walk_stackframe(struct task_struct *task, + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && unlikely(!fn(arg, pc))) + break; +- pc = (*ksp++) - 0x4; ++ pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; + } + } + +-- +2.39.2 + diff --git a/queue-5.15/s390-ftrace-remove-dead-code.patch b/queue-5.15/s390-ftrace-remove-dead-code.patch new file mode 100644 index 00000000000..7d14365aa8b --- /dev/null +++ b/queue-5.15/s390-ftrace-remove-dead-code.patch @@ -0,0 +1,170 @@ +From 87e00f67048b824ef249acb1c64f2a9ca6d1c6d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Sep 2021 16:08:33 +0200 +Subject: s390/ftrace: remove dead code + +From: Heiko Carstens + +[ Upstream commit b860b9346e2d5667fbae2cefc571bdb6ce665b53 ] + +ftrace_shared_hotpatch_trampoline() never returns NULL, +therefore quite a bit of code can be removed. + +Acked-by: Ilya Leoshkevich +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Stable-dep-of: 2a8db5ec4a28 ("RISC-V: Don't check text_mutex during stop_machine") +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/ftrace.c | 86 +++------------------------------------ + 1 file changed, 6 insertions(+), 80 deletions(-) + +diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c +index 1d94ffdf347bb..5d0c45c13b5fa 100644 +--- a/arch/s390/kernel/ftrace.c ++++ b/arch/s390/kernel/ftrace.c +@@ -80,17 +80,6 @@ asm( + + #ifdef CONFIG_MODULES + static char *ftrace_plt; +- +-asm( +- " .data\n" +- "ftrace_plt_template:\n" +- " basr %r1,%r0\n" +- " lg %r1,0f-.(%r1)\n" +- " br %r1\n" +- "0: .quad ftrace_caller\n" +- "ftrace_plt_template_end:\n" +- " .previous\n" +-); + #endif /* CONFIG_MODULES */ + + static const char *ftrace_shared_hotpatch_trampoline(const char **end) +@@ -116,7 +105,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) + + bool ftrace_need_init_nop(void) + { +- return ftrace_shared_hotpatch_trampoline(NULL); ++ return true; + } + + int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +@@ -175,28 +164,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + return 0; + } + +-static void ftrace_generate_nop_insn(struct ftrace_insn *insn) +-{ +- /* brcl 0,0 */ +- insn->opc = 0xc004; +- insn->disp = 0; +-} +- +-static void ftrace_generate_call_insn(struct ftrace_insn *insn, +- unsigned long ip) +-{ +- unsigned long target; +- +- /* brasl r0,ftrace_caller */ +- target = FTRACE_ADDR; +-#ifdef CONFIG_MODULES +- if (is_module_addr((void *)ip)) +- target = (unsigned long)ftrace_plt; +-#endif /* CONFIG_MODULES */ +- insn->opc = 0xc005; +- insn->disp = (target - ip) / 2; +-} +- + static void brcl_disable(void *brcl) + { + u8 op = 0x04; /* set mask field to zero */ +@@ -207,23 +174,7 @@ static void brcl_disable(void *brcl) + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) + { +- struct ftrace_insn orig, new, old; +- +- if (ftrace_shared_hotpatch_trampoline(NULL)) { +- brcl_disable((void *)rec->ip); +- return 0; +- } +- +- if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) +- return -EFAULT; +- /* Replace ftrace call with a nop. */ +- ftrace_generate_call_insn(&orig, rec->ip); +- ftrace_generate_nop_insn(&new); +- +- /* Verify that the to be replaced code matches what we expect. */ +- if (memcmp(&orig, &old, sizeof(old))) +- return -EINVAL; +- s390_kernel_write((void *) rec->ip, &new, sizeof(new)); ++ brcl_disable((void *)rec->ip); + return 0; + } + +@@ -236,23 +187,7 @@ static void brcl_enable(void *brcl) + + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) + { +- struct ftrace_insn orig, new, old; +- +- if (ftrace_shared_hotpatch_trampoline(NULL)) { +- brcl_enable((void *)rec->ip); +- return 0; +- } +- +- if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) +- return -EFAULT; +- /* Replace nop with an ftrace call. */ +- ftrace_generate_nop_insn(&orig); +- ftrace_generate_call_insn(&new, rec->ip); +- +- /* Verify that the to be replaced code matches what we expect. */ +- if (memcmp(&orig, &old, sizeof(old))) +- return -EINVAL; +- s390_kernel_write((void *) rec->ip, &new, sizeof(new)); ++ brcl_enable((void *)rec->ip); + return 0; + } + +@@ -269,10 +204,7 @@ int __init ftrace_dyn_arch_init(void) + + void arch_ftrace_update_code(int command) + { +- if (ftrace_shared_hotpatch_trampoline(NULL)) +- ftrace_modify_all_code(command); +- else +- ftrace_run_stop_machine(command); ++ ftrace_modify_all_code(command); + } + + static void __ftrace_sync(void *dummy) +@@ -281,10 +213,8 @@ static void __ftrace_sync(void *dummy) + + int ftrace_arch_code_modify_post_process(void) + { +- if (ftrace_shared_hotpatch_trampoline(NULL)) { +- /* Send SIGP to the other CPUs, so they see the new code. */ +- smp_call_function(__ftrace_sync, NULL, 1); +- } ++ /* Send SIGP to the other CPUs, so they see the new code. */ ++ smp_call_function(__ftrace_sync, NULL, 1); + return 0; + } + +@@ -299,10 +229,6 @@ static int __init ftrace_plt_init(void) + panic("cannot allocate ftrace plt\n"); + + start = ftrace_shared_hotpatch_trampoline(&end); +- if (!start) { +- start = ftrace_plt_template; +- end = ftrace_plt_template_end; +- } + memcpy(ftrace_plt, start, end - start); + set_memory_ro((unsigned long)ftrace_plt, 1); + return 0; +-- +2.39.2 + diff --git a/queue-5.15/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch b/queue-5.15/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch new file mode 100644 index 00000000000..fd6fb6d0d79 --- /dev/null +++ b/queue-5.15/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch @@ -0,0 +1,79 @@ +From ef4e22de30602b27bcc56fed0a5145dd3dea242c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Feb 2023 12:52:00 -0800 +Subject: scsi: core: Remove the /proc/scsi/${proc_name} directory earlier + +From: Bart Van Assche + +[ Upstream commit fc663711b94468f4e1427ebe289c9f05669699c9 ] + +Remove the /proc/scsi/${proc_name} directory earlier to fix a race +condition between unloading and reloading kernel modules. This fixes a bug +introduced in 2009 by commit 77c019768f06 ("[SCSI] fix /proc memory leak in +the SCSI core"). + +Fix the following kernel warning: + +proc_dir_entry 'scsi/scsi_debug' already registered +WARNING: CPU: 19 PID: 27986 at fs/proc/generic.c:376 proc_register+0x27d/0x2e0 +Call Trace: + proc_mkdir+0xb5/0xe0 + scsi_proc_hostdir_add+0xb5/0x170 + scsi_host_alloc+0x683/0x6c0 + sdebug_driver_probe+0x6b/0x2d0 [scsi_debug] + really_probe+0x159/0x540 + __driver_probe_device+0xdc/0x230 + driver_probe_device+0x4f/0x120 + __device_attach_driver+0xef/0x180 + bus_for_each_drv+0xe5/0x130 + __device_attach+0x127/0x290 + device_initial_probe+0x17/0x20 + bus_probe_device+0x110/0x130 + device_add+0x673/0xc80 + device_register+0x1e/0x30 + sdebug_add_host_helper+0x1a7/0x3b0 [scsi_debug] + scsi_debug_init+0x64f/0x1000 [scsi_debug] + do_one_initcall+0xd7/0x470 + do_init_module+0xe7/0x330 + load_module+0x122a/0x12c0 + __do_sys_finit_module+0x124/0x1a0 + __x64_sys_finit_module+0x46/0x50 + do_syscall_64+0x38/0x80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Link: https://lore.kernel.org/r/20230210205200.36973-3-bvanassche@acm.org +Cc: Alan Stern +Cc: Yi Zhang +Cc: stable@vger.kernel.org +Fixes: 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core") +Reported-by: Yi Zhang +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/hosts.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c +index 0165dad803001..28b201c443267 100644 +--- a/drivers/scsi/hosts.c ++++ b/drivers/scsi/hosts.c +@@ -180,6 +180,7 @@ void scsi_remove_host(struct Scsi_Host *shost) + scsi_forget_host(shost); + mutex_unlock(&shost->scan_mutex); + scsi_proc_host_rm(shost); ++ scsi_proc_hostdir_rm(shost->hostt); + + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_DEL)) +@@ -321,6 +322,7 @@ static void scsi_host_dev_release(struct device *dev) + struct Scsi_Host *shost = dev_to_shost(dev); + struct device *parent = dev->parent; + ++ /* In case scsi_remove_host() has not been called. */ + scsi_proc_hostdir_rm(shost->hostt); + + /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */ +-- +2.39.2 + diff --git a/queue-5.15/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch b/queue-5.15/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch new file mode 100644 index 00000000000..69ab6816721 --- /dev/null +++ b/queue-5.15/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch @@ -0,0 +1,60 @@ +From 2aeff703ec4a14681aed378b96d846258a05d1f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 16:23:40 +0530 +Subject: scsi: megaraid_sas: Update max supported LD IDs to 240 + +From: Chandrakanth Patil + +[ Upstream commit bfa659177dcba48cf13f2bd88c1972f12a60bf1c ] + +The firmware only supports Logical Disk IDs up to 240 and LD ID 255 (0xFF) +is reserved for deleted LDs. However, in some cases, firmware was assigning +LD ID 254 (0xFE) to deleted LDs and this was causing the driver to mark the +wrong disk as deleted. This in turn caused the wrong disk device to be +taken offline by the SCSI midlayer. + +To address this issue, limit the LD ID range from 255 to 240. This ensures +the deleted LD ID is properly identified and removed by the driver without +accidently deleting any valid LDs. + +Fixes: ae6874ba4b43 ("scsi: megaraid_sas: Early detection of VD deletion through RaidMap update") +Reported-by: Martin K. Petersen +Signed-off-by: Chandrakanth Patil +Signed-off-by: Sumit Saxena +Link: https://lore.kernel.org/r/20230302105342.34933-2-chandrakanth.patil@broadcom.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/megaraid/megaraid_sas.h | 2 ++ + drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h +index 650210d2abb4d..02d7ab119f806 100644 +--- a/drivers/scsi/megaraid/megaraid_sas.h ++++ b/drivers/scsi/megaraid/megaraid_sas.h +@@ -1517,6 +1517,8 @@ struct megasas_ctrl_info { + #define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \ + MEGASAS_MAX_DEV_PER_CHANNEL) + ++#define MEGASAS_MAX_SUPPORTED_LD_IDS 240 ++ + #define MEGASAS_MAX_SECTORS (2*1024) + #define MEGASAS_MAX_SECTORS_IEEE (2*128) + #define MEGASAS_DBG_LVL 1 +diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c +index 83f69c33b01a9..ec10d35b4685a 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_fp.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c +@@ -358,7 +358,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id) + ld = MR_TargetIdToLdGet(i, drv_map); + + /* For non existing VDs, iterate to next VD*/ +- if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1)) ++ if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS) + continue; + + raid = MR_LdRaidGet(ld, drv_map); +-- +2.39.2 + diff --git a/queue-5.15/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch b/queue-5.15/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch new file mode 100644 index 00000000000..1c21512c1b9 --- /dev/null +++ b/queue-5.15/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch @@ -0,0 +1,58 @@ +From bc89d13363f67e413883cb509a1699ae80cf19fb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 17:36:46 +0800 +Subject: selftests: nft_nat: ensuring the listening side is up before starting + the client + +From: Hangbin Liu + +[ Upstream commit 2067e7a00aa604b94de31d64f29b8893b1696f26 ] + +The test_local_dnat_portonly() function initiates the client-side as +soon as it sets the listening side to the background. This could lead to +a race condition where the server may not be ready to listen. To ensure +that the server-side is up and running before initiating the +client-side, a delay is introduced to the test_local_dnat_portonly() +function. + +Before the fix: + # ./nft_nat.sh + PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU + PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU + PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush + PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU + 2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused + ERROR: inet port rewrite + +After the fix: + # ./nft_nat.sh + PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ + PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ + PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush + PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ + PASS: inet port rewrite without l3 address + +Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address") +Signed-off-by: Hangbin Liu +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/netfilter/nft_nat.sh | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh +index 032f2de6e14e0..462dc47420b65 100755 +--- a/tools/testing/selftests/netfilter/nft_nat.sh ++++ b/tools/testing/selftests/netfilter/nft_nat.sh +@@ -404,6 +404,8 @@ EOF + echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & + sc_s=$! + ++ sleep 1 ++ + result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) + + if [ "$result" = "SERVER-inet" ];then +-- +2.39.2 + diff --git a/queue-5.15/series b/queue-5.15/series index 8f08b75e6e2..fcab5b166f6 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -14,3 +14,70 @@ ext4-fix-warning-in-ext4_update_inline_data.patch ext4-zero-i_disksize-when-initializing-the-bootloader-inode.patch nfc-change-order-inside-nfc_se_io-error-path.patch irqdomain-fix-mapping-creation-race.patch +kvm-optimize-kvm_make_vcpus_request_mask-a-bit.patch +kvm-pre-allocate-cpumasks-for-kvm_make_all_cpus_requ.patch +kvm-register-dev-kvm-as-the-_very_-last-thing-during.patch +kvm-svm-don-t-rewrite-guest-icr-on-avic-ipi-virtuali.patch +kvm-svm-process-icr-on-avic-ipi-delivery-failure-due.patch +fs-dlm-fix-log-of-lowcomms-vs-midcomms.patch +fs-dlm-add-midcomms-init-start-functions.patch +fs-dlm-start-midcomms-before-scand.patch +udf-fix-off-by-one-error-when-discarding-preallocati.patch +f2fs-avoid-down_write-on-nat_tree_lock-during-checkp.patch +f2fs-do-not-bother-checkpoint-by-f2fs_get_node_info.patch +f2fs-retry-to-update-the-inode-page-given-data-corru.patch +ipmi-ssif-increase-the-message-retry-time.patch +ipmi-ssif-add-a-timer-between-request-retries.patch +irqdomain-refactor-__irq_domain_alloc_irqs.patch +iommu-vt-d-fix-pasid-directory-pointer-coherency.patch +block-brd-add-error-handling-support-for-add_disk.patch +brd-mark-as-nowait-compatible.patch +efi-earlycon-replace-open-coded-strnchrnul.patch +arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch +risc-v-avoid-dereferening-null-regs-in-die.patch +riscv-avoid-enabling-interrupts-in-die.patch +riscv-add-header-include-guards-to-insn.h.patch +scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch +regulator-flag-uncontrollable-regulators-as-always_o.patch +regulator-core-fix-off-on-delay-us-for-always-on-boo.patch +regulator-core-use-ktime_get_boottime-to-determine-h.patch +ext4-fix-possible-corruption-when-moving-a-directory.patch +drm-nouveau-kms-nv50-remove-unused-functions.patch +drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch +drm-msm-fix-potential-invalid-ptr-free.patch +drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch +drm-msm-a5xx-fix-highest-bank-bit-for-a530.patch +drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch +drm-msm-a5xx-fix-context-faults-during-ring-switch.patch +bgmac-fix-initial-chip-reset-to-support-bcm5358.patch +nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch +powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch +ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch +selftests-nft_nat-ensuring-the-listening-side-is-up-.patch +perf-stat-fix-counting-when-initial-delay-configured.patch +net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch +net-caif-fix-use-after-free-in-cfusbl_device_notify.patch +ice-copy-last-block-omitted-in-ice_get_module_eeprom.patch +bpf-sockmap-fix-an-infinite-loop-error-when-len-is-0.patch +drm-msm-dpu-fix-len-of-sc7180-ctl-blocks.patch +net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch +net-phylib-get-rid-of-unnecessary-locking.patch +bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch +netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch +netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch +btf-fix-resolving-btf_kind_var-after-array-struct-un.patch +net-phy-smsc-cache-interrupt-mask.patch +net-phy-smsc-fix-link-up-detection-in-forced-irq-mod.patch +net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch +scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch +netfilter-conntrack-adopt-safer-max-chain-length.patch +platform-x86-mlx_platform-select-regmap-instead-of-d.patch +net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch +octeontx2-af-unlock-contexts-in-the-queue-context-ca.patch +sunrpc-fix-a-server-shutdown-leak.patch +net-dsa-mt7530-permit-port-5-to-work-without-port-6-.patch +af_unix-remove-unnecessary-brackets-around-config_af.patch +af_unix-fix-struct-pid-leaks-in-oob-support.patch +riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch +s390-ftrace-remove-dead-code.patch +risc-v-don-t-check-text_mutex-during-stop_machine.patch diff --git a/queue-5.15/sunrpc-fix-a-server-shutdown-leak.patch b/queue-5.15/sunrpc-fix-a-server-shutdown-leak.patch new file mode 100644 index 00000000000..382344bd97c --- /dev/null +++ b/queue-5.15/sunrpc-fix-a-server-shutdown-leak.patch @@ -0,0 +1,48 @@ +From 91d6554571f572d806bea65aa4ddd99ab6143df6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 16:08:32 -0500 +Subject: SUNRPC: Fix a server shutdown leak + +From: Benjamin Coddington + +[ Upstream commit 9ca6705d9d609441d34f8b853e1e4a6369b3b171 ] + +Fix a race where kthread_stop() may prevent the threadfn from ever getting +called. If that happens the svc_rqst will not be cleaned up. + +Fixes: ed6473ddc704 ("NFSv4: Fix callback server shutdown") +Signed-off-by: Benjamin Coddington +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + net/sunrpc/svc.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c +index 08ca797bb8a46..74a1c9116a785 100644 +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -806,6 +806,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); + static int + svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) + { ++ struct svc_rqst *rqstp; + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + +@@ -814,7 +815,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; +- kthread_stop(task); ++ rqstp = kthread_data(task); ++ /* Did we lose a race to svo_function threadfn? */ ++ if (kthread_stop(task) == -EINTR) ++ svc_exit_thread(rqstp); + nrservs++; + } while (nrservs < 0); + return 0; +-- +2.39.2 + diff --git a/queue-5.15/udf-fix-off-by-one-error-when-discarding-preallocati.patch b/queue-5.15/udf-fix-off-by-one-error-when-discarding-preallocati.patch new file mode 100644 index 00000000000..8521a971d45 --- /dev/null +++ b/queue-5.15/udf-fix-off-by-one-error-when-discarding-preallocati.patch @@ -0,0 +1,38 @@ +From e880b66a27f5f2eaaa6406d0e42e52f1a94b6235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Jan 2023 14:29:15 +0100 +Subject: udf: Fix off-by-one error when discarding preallocation + +From: Jan Kara + +[ Upstream commit f54aa97fb7e5329a373f9df4e5e213ced4fc8759 ] + +The condition determining whether the preallocation can be used had +an off-by-one error so we didn't discard preallocation when new +allocation was just following it. This can then confuse code in +inode_getblk(). + +CC: stable@vger.kernel.org +Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole") +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/udf/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/udf/inode.c b/fs/udf/inode.c +index a151e04856afe..594d224588819 100644 +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -442,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block, + * Block beyond EOF and prealloc extents? Just discard preallocation + * as it is not useful and complicates things. + */ +- if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents) ++ if (((loff_t)block) << inode->i_blkbits >= iinfo->i_lenExtents) + udf_discard_prealloc(inode); + udf_clear_extent_cache(inode); + phys = inode_getblk(inode, block, &err, &new); +-- +2.39.2 + -- 2.47.3