From 2915ba2a09b69d888913f8124704a0d37dd90fc8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 17 May 2023 21:37:37 -0400 Subject: [PATCH] Fixes for 5.15 Signed-off-by: Sasha Levin --- ...ata-race-of-sk-sk_receive_queue-qlen.patch | 84 ++++ ...fix-data-races-around-sk-sk_shutdown.patch | 153 +++++++ ...-jornada-7xx-fix-kernel-doc-warnings.patch | 69 +++ ...915-dp-prevent-potential-div-by-zero.patch | 50 ++ ...i-set-the-fwnode-for-mipi_dsi_device.patch | 48 ++ ...t4-allow-ext4_get_group_info-to-fail.patch | 426 ++++++++++++++++++ ...nd-by-goal-if-ext4_mb_hint_goal_only.patch | 43 ++ ...-sb_rdonly-when-remounting-r-w-until.patch | 77 ++++ ...ix-lockdep-warning-when-enabling-mmp.patch | 94 ++++ ...or-codes-from-ext4_multi_mount_prote.patch | 128 ++++++ ...nused-variable-warning-with-config_q.patch | 63 +++ ...fb-fix-error-handling-in-arcfb_probe.patch | 81 ++++ ...-remove-the-code-of-clearing-pba-bit.patch | 50 ++ ...t-of-bounds-caused-by-unclear-skb-cb.patch | 172 +++++++ ...hing-if-no-time-delta-between-sample.patch | 108 +++++ ...d-vlan_get_protocol_and_depth-helper.patch | 174 +++++++ ...ate-sk-sk_err-write-from-do_recvmmsg.patch | 40 ++ ...gram-fix-data-races-in-datagram_poll.patch | 69 +++ ...ith-most-data-races-in-sk_wait_event.patch | 224 +++++++++ ...ring-on-sk-sk_stamp-in-sock_recv_cms.patch | 82 ++++ ...ix-an-error-handling-path-in-mvusb_m.patch | 54 +++ ...alize-mac_oneus_tic_counter-register.patch | 96 ++++ ...h-to-use-interrupt-for-hw-crosstimes.patch | 224 +++++++++ ...ack-fix-possible-bug_on-with-enable_.patch | 77 ++++ ...les-always-release-netdev-hooks-from.patch | 76 ++++ ...-annotate-accesses-to-nlk-cb_running.patch | 109 +++++ ...x-i-o-hang-that-occurs-when-bkops-fa.patch | 50 ++ queue-5.15/series | 29 ++ ...tions-around-sk-sk_shutdown-accesses.patch | 158 +++++++ ...ake-broadcast-device-replacement-wor.patch | 274 +++++++++++ 30 files changed, 3382 insertions(+) create mode 100644 queue-5.15/af_unix-fix-a-data-race-of-sk-sk_receive_queue-qlen.patch create mode 100644 queue-5.15/af_unix-fix-data-races-around-sk-sk_shutdown.patch create mode 100644 queue-5.15/arm-9296-1-hp-jornada-7xx-fix-kernel-doc-warnings.patch create mode 100644 queue-5.15/drm-i915-dp-prevent-potential-div-by-zero.patch create mode 100644 queue-5.15/drm-mipi-dsi-set-the-fwnode-for-mipi_dsi_device.patch create mode 100644 queue-5.15/ext4-allow-ext4_get_group_info-to-fail.patch create mode 100644 queue-5.15/ext4-allow-to-find-by-goal-if-ext4_mb_hint_goal_only.patch create mode 100644 queue-5.15/ext4-don-t-clear-sb_rdonly-when-remounting-r-w-until.patch create mode 100644 queue-5.15/ext4-fix-lockdep-warning-when-enabling-mmp.patch create mode 100644 queue-5.15/ext4-reflect-error-codes-from-ext4_multi_mount_prote.patch create mode 100644 queue-5.15/ext4-remove-an-unused-variable-warning-with-config_q.patch create mode 100644 queue-5.15/fbdev-arcfb-fix-error-handling-in-arcfb_probe.patch create mode 100644 queue-5.15/gve-remove-the-code-of-clearing-pba-bit.patch create mode 100644 queue-5.15/ipvlan-fix-out-of-bounds-caused-by-unclear-skb-cb.patch create mode 100644 queue-5.15/linux-dim-do-nothing-if-no-time-delta-between-sample.patch create mode 100644 queue-5.15/net-add-vlan_get_protocol_and_depth-helper.patch create mode 100644 queue-5.15/net-annotate-sk-sk_err-write-from-do_recvmmsg.patch create mode 100644 queue-5.15/net-datagram-fix-data-races-in-datagram_poll.patch create mode 100644 queue-5.15/net-deal-with-most-data-races-in-sk_wait_event.patch create mode 100644 queue-5.15/net-fix-load-tearing-on-sk-sk_stamp-in-sock_recv_cms.patch create mode 100644 queue-5.15/net-mdio-mvusb-fix-an-error-handling-path-in-mvusb_m.patch create mode 100644 queue-5.15/net-stmmac-initialize-mac_oneus_tic_counter-register.patch create mode 100644 queue-5.15/net-stmmac-switch-to-use-interrupt-for-hw-crosstimes.patch create mode 100644 queue-5.15/netfilter-conntrack-fix-possible-bug_on-with-enable_.patch create mode 100644 queue-5.15/netfilter-nf_tables-always-release-netdev-hooks-from.patch create mode 100644 queue-5.15/netlink-annotate-accesses-to-nlk-cb_running.patch create mode 100644 queue-5.15/scsi-ufs-core-fix-i-o-hang-that-occurs-when-bkops-fa.patch create mode 100644 queue-5.15/tcp-add-annotations-around-sk-sk_shutdown-accesses.patch create mode 100644 queue-5.15/tick-broadcast-make-broadcast-device-replacement-wor.patch diff --git a/queue-5.15/af_unix-fix-a-data-race-of-sk-sk_receive_queue-qlen.patch b/queue-5.15/af_unix-fix-a-data-race-of-sk-sk_receive_queue-qlen.patch new file mode 100644 index 00000000000..06bc8df603d --- /dev/null +++ b/queue-5.15/af_unix-fix-a-data-race-of-sk-sk_receive_queue-qlen.patch @@ -0,0 +1,84 @@ +From a84058ee02559f4742da26a33c44c3ed6c5a9657 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 17:34:55 -0700 +Subject: af_unix: Fix a data race of sk->sk_receive_queue->qlen. + +From: Kuniyuki Iwashima + +[ Upstream commit 679ed006d416ea0cecfe24a99d365d1dea69c683 ] + +KCSAN found a data race of sk->sk_receive_queue->qlen where recvmsg() +updates qlen under the queue lock and sendmsg() checks qlen under +unix_state_sock(), not the queue lock, so the reader side needs +READ_ONCE(). + +BUG: KCSAN: data-race in __skb_try_recv_from_queue / unix_wait_for_peer + +write (marked) to 0xffff888019fe7c68 of 4 bytes by task 49792 on cpu 0: + __skb_unlink include/linux/skbuff.h:2347 [inline] + __skb_try_recv_from_queue+0x3de/0x470 net/core/datagram.c:197 + __skb_try_recv_datagram+0xf7/0x390 net/core/datagram.c:263 + __unix_dgram_recvmsg+0x109/0x8a0 net/unix/af_unix.c:2452 + unix_dgram_recvmsg+0x94/0xa0 net/unix/af_unix.c:2549 + sock_recvmsg_nosec net/socket.c:1019 [inline] + ____sys_recvmsg+0x3a3/0x3b0 net/socket.c:2720 + ___sys_recvmsg+0xc8/0x150 net/socket.c:2764 + do_recvmmsg+0x182/0x560 net/socket.c:2858 + __sys_recvmmsg net/socket.c:2937 [inline] + __do_sys_recvmmsg net/socket.c:2960 [inline] + __se_sys_recvmmsg net/socket.c:2953 [inline] + __x64_sys_recvmmsg+0x153/0x170 net/socket.c:2953 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +read to 0xffff888019fe7c68 of 4 bytes by task 49793 on cpu 1: + skb_queue_len include/linux/skbuff.h:2127 [inline] + unix_recvq_full net/unix/af_unix.c:229 [inline] + unix_wait_for_peer+0x154/0x1a0 net/unix/af_unix.c:1445 + unix_dgram_sendmsg+0x13bc/0x14b0 net/unix/af_unix.c:2048 + sock_sendmsg_nosec net/socket.c:724 [inline] + sock_sendmsg+0x148/0x160 net/socket.c:747 + ____sys_sendmsg+0x20e/0x620 net/socket.c:2503 + ___sys_sendmsg+0xc6/0x140 net/socket.c:2557 + __sys_sendmmsg+0x11d/0x370 net/socket.c:2643 + __do_sys_sendmmsg net/socket.c:2672 [inline] + __se_sys_sendmmsg net/socket.c:2669 [inline] + __x64_sys_sendmmsg+0x58/0x70 net/socket.c:2669 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +value changed: 0x0000000b -> 0x00000001 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 49793 Comm: syz-executor.0 Not tainted 6.3.0-rc7-02330-gca6270c12e20 #2 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Reviewed-by: Michal Kubiak +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index a96026dbdf94e..230e20cd986e2 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1307,7 +1307,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) + + sched = !sock_flag(other, SOCK_DEAD) && + !(other->sk_shutdown & RCV_SHUTDOWN) && +- unix_recvq_full(other); ++ unix_recvq_full_lockless(other); + + unix_state_unlock(other); + +-- +2.39.2 + diff --git a/queue-5.15/af_unix-fix-data-races-around-sk-sk_shutdown.patch b/queue-5.15/af_unix-fix-data-races-around-sk-sk_shutdown.patch new file mode 100644 index 00000000000..a4ab03614db --- /dev/null +++ b/queue-5.15/af_unix-fix-data-races-around-sk-sk_shutdown.patch @@ -0,0 +1,153 @@ +From d041cee41baeb2502ee598e38de94edcec2876b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 17:34:56 -0700 +Subject: af_unix: Fix data races around sk->sk_shutdown. + +From: Kuniyuki Iwashima + +[ Upstream commit e1d09c2c2f5793474556b60f83900e088d0d366d ] + +KCSAN found a data race around sk->sk_shutdown where unix_release_sock() +and unix_shutdown() update it under unix_state_lock(), OTOH unix_poll() +and unix_dgram_poll() read it locklessly. + +We need to annotate the writes and reads with WRITE_ONCE() and READ_ONCE(). + +BUG: KCSAN: data-race in unix_poll / unix_release_sock + +write to 0xffff88800d0f8aec of 1 bytes by task 264 on cpu 0: + unix_release_sock+0x75c/0x910 net/unix/af_unix.c:631 + unix_release+0x59/0x80 net/unix/af_unix.c:1042 + __sock_release+0x7d/0x170 net/socket.c:653 + sock_close+0x19/0x30 net/socket.c:1397 + __fput+0x179/0x5e0 fs/file_table.c:321 + ____fput+0x15/0x20 fs/file_table.c:349 + task_work_run+0x116/0x1a0 kernel/task_work.c:179 + resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] + exit_to_user_mode_loop kernel/entry/common.c:171 [inline] + exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204 + __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] + syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297 + do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +read to 0xffff88800d0f8aec of 1 bytes by task 222 on cpu 1: + unix_poll+0xa3/0x2a0 net/unix/af_unix.c:3170 + sock_poll+0xcf/0x2b0 net/socket.c:1385 + vfs_poll include/linux/poll.h:88 [inline] + ep_item_poll.isra.0+0x78/0xc0 fs/eventpoll.c:855 + ep_send_events fs/eventpoll.c:1694 [inline] + ep_poll fs/eventpoll.c:1823 [inline] + do_epoll_wait+0x6c4/0xea0 fs/eventpoll.c:2258 + __do_sys_epoll_wait fs/eventpoll.c:2270 [inline] + __se_sys_epoll_wait fs/eventpoll.c:2265 [inline] + __x64_sys_epoll_wait+0xcc/0x190 fs/eventpoll.c:2265 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +value changed: 0x00 -> 0x03 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 222 Comm: dbus-broker Not tainted 6.3.0-rc7-02330-gca6270c12e20 #2 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 + +Fixes: 3c73419c09a5 ("af_unix: fix 'poll for write'/ connected DGRAM sockets") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Reviewed-by: Michal Kubiak +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 230e20cd986e2..d326540e4938c 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -538,7 +538,7 @@ static void unix_release_sock(struct sock *sk, int embrion) + /* Clear state */ + unix_state_lock(sk); + sock_orphan(sk); +- sk->sk_shutdown = SHUTDOWN_MASK; ++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); + path = u->path; + u->path.dentry = NULL; + u->path.mnt = NULL; +@@ -563,7 +563,7 @@ static void unix_release_sock(struct sock *sk, int embrion) + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { + unix_state_lock(skpair); + /* No more writes */ +- skpair->sk_shutdown = SHUTDOWN_MASK; ++ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); + if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) + skpair->sk_err = ECONNRESET; + unix_state_unlock(skpair); +@@ -2894,7 +2894,7 @@ static int unix_shutdown(struct socket *sock, int mode) + ++mode; + + unix_state_lock(sk); +- sk->sk_shutdown |= mode; ++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); + other = unix_peer(sk); + if (other) + sock_hold(other); +@@ -2914,7 +2914,7 @@ static int unix_shutdown(struct socket *sock, int mode) + if (mode&SEND_SHUTDOWN) + peer_mode |= RCV_SHUTDOWN; + unix_state_lock(other); +- other->sk_shutdown |= peer_mode; ++ WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); + unix_state_unlock(other); + other->sk_state_change(other); + if (peer_mode == SHUTDOWN_MASK) +@@ -3046,16 +3046,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa + { + struct sock *sk = sock->sk; + __poll_t mask; ++ u8 shutdown; + + sock_poll_wait(file, sock, wait); + mask = 0; ++ shutdown = READ_ONCE(sk->sk_shutdown); + + /* exceptional events? */ + if (sk->sk_err) + mask |= EPOLLERR; +- if (sk->sk_shutdown == SHUTDOWN_MASK) ++ if (shutdown == SHUTDOWN_MASK) + mask |= EPOLLHUP; +- if (sk->sk_shutdown & RCV_SHUTDOWN) ++ if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; + + /* readable? */ +@@ -3089,18 +3091,20 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + struct sock *sk = sock->sk, *other; + unsigned int writable; + __poll_t mask; ++ u8 shutdown; + + sock_poll_wait(file, sock, wait); + mask = 0; ++ shutdown = READ_ONCE(sk->sk_shutdown); + + /* exceptional events? */ + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +- if (sk->sk_shutdown & RCV_SHUTDOWN) ++ if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; +- if (sk->sk_shutdown == SHUTDOWN_MASK) ++ if (shutdown == SHUTDOWN_MASK) + mask |= EPOLLHUP; + + /* readable? */ +-- +2.39.2 + diff --git a/queue-5.15/arm-9296-1-hp-jornada-7xx-fix-kernel-doc-warnings.patch b/queue-5.15/arm-9296-1-hp-jornada-7xx-fix-kernel-doc-warnings.patch new file mode 100644 index 00000000000..8befbfde479 --- /dev/null +++ b/queue-5.15/arm-9296-1-hp-jornada-7xx-fix-kernel-doc-warnings.patch @@ -0,0 +1,69 @@ +From a1cc1c397fb2c78dfa5e6c918a9ec41005fda3a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 23 Apr 2023 06:48:45 +0100 +Subject: ARM: 9296/1: HP Jornada 7XX: fix kernel-doc warnings + +From: Randy Dunlap + +[ Upstream commit 46dd6078dbc7e363a8bb01209da67015a1538929 ] + +Fix kernel-doc warnings from the kernel test robot: + +jornada720_ssp.c:24: warning: Function parameter or member 'jornada_ssp_lock' not described in 'DEFINE_SPINLOCK' +jornada720_ssp.c:24: warning: expecting prototype for arch/arm/mac(). Prototype was for DEFINE_SPINLOCK() instead +jornada720_ssp.c:34: warning: Function parameter or member 'byte' not described in 'jornada_ssp_reverse' +jornada720_ssp.c:57: warning: Function parameter or member 'byte' not described in 'jornada_ssp_byte' +jornada720_ssp.c:85: warning: Function parameter or member 'byte' not described in 'jornada_ssp_inout' + +Link: lore.kernel.org/r/202304210535.tWby3jWF-lkp@intel.com + +Fixes: 69ebb22277a5 ("[ARM] 4506/1: HP Jornada 7XX: Addition of SSP Platform Driver") +Signed-off-by: Randy Dunlap +Reported-by: kernel test robot +Cc: Arnd Bergmann +Cc: Kristoffer Ericson +Cc: patches@armlinux.org.uk +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/mach-sa1100/jornada720_ssp.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c +index 1dbe98948ce30..9627c4cf3e41d 100644 +--- a/arch/arm/mach-sa1100/jornada720_ssp.c ++++ b/arch/arm/mach-sa1100/jornada720_ssp.c +@@ -1,5 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0-only +-/** ++/* + * arch/arm/mac-sa1100/jornada720_ssp.c + * + * Copyright (C) 2006/2007 Kristoffer Ericson +@@ -26,6 +26,7 @@ static unsigned long jornada_ssp_flags; + + /** + * jornada_ssp_reverse - reverses input byte ++ * @byte: input byte to reverse + * + * we need to reverse all data we receive from the mcu due to its physical location + * returns : 01110111 -> 11101110 +@@ -46,6 +47,7 @@ EXPORT_SYMBOL(jornada_ssp_reverse); + + /** + * jornada_ssp_byte - waits for ready ssp bus and sends byte ++ * @byte: input byte to transmit + * + * waits for fifo buffer to clear and then transmits, if it doesn't then we will + * timeout after rounds. Needs mcu running before its called. +@@ -77,6 +79,7 @@ EXPORT_SYMBOL(jornada_ssp_byte); + + /** + * jornada_ssp_inout - decide if input is command or trading byte ++ * @byte: input byte to send (may be %TXDUMMY) + * + * returns : (jornada_ssp_byte(byte)) on success + * : %-ETIMEDOUT on timeout failure +-- +2.39.2 + diff --git a/queue-5.15/drm-i915-dp-prevent-potential-div-by-zero.patch b/queue-5.15/drm-i915-dp-prevent-potential-div-by-zero.patch new file mode 100644 index 00000000000..64d34b2b1ec --- /dev/null +++ b/queue-5.15/drm-i915-dp-prevent-potential-div-by-zero.patch @@ -0,0 +1,50 @@ +From 55ce0816518130a9ee0812604cb3c306207afc4f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Apr 2023 07:04:30 -0700 +Subject: drm/i915/dp: prevent potential div-by-zero + +From: Nikita Zhandarovich + +[ Upstream commit 0ff80028e2702c7c3d78b69705dc47c1ccba8c39 ] + +drm_dp_dsc_sink_max_slice_count() may return 0 if something goes +wrong on the part of the DSC sink and its DPCD register. This null +value may be later used as a divisor in intel_dsc_compute_params(), +which will lead to an error. +In the unlikely event that this issue occurs, fix it by testing the +return value of drm_dp_dsc_sink_max_slice_count() against zero. + +Found by Linux Verification Center (linuxtesting.org) with static +analysis tool SVACE. + +Fixes: a4a157777c80 ("drm/i915/dp: Compute DSC pipe config in atomic check") +Signed-off-by: Nikita Zhandarovich +Reviewed-by: Rodrigo Vivi +Signed-off-by: Rodrigo Vivi +Link: https://patchwork.freedesktop.org/patch/msgid/20230418140430.69902-1-n.zhandarovich@fintech.ru +(cherry picked from commit 51f7008239de011370c5067bbba07f0207f06b72) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/display/intel_dp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c +index 64a15b636e8d4..6cc1258578088 100644 +--- a/drivers/gpu/drm/i915/display/intel_dp.c ++++ b/drivers/gpu/drm/i915/display/intel_dp.c +@@ -1231,6 +1231,11 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, + pipe_config->dsc.slice_count = + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + true); ++ if (!pipe_config->dsc.slice_count) { ++ drm_dbg_kms(&dev_priv->drm, "Unsupported Slice Count %d\n", ++ pipe_config->dsc.slice_count); ++ return -EINVAL; ++ } + } else { + u16 dsc_max_output_bpp; + u8 dsc_dp_slice_count; +-- +2.39.2 + diff --git a/queue-5.15/drm-mipi-dsi-set-the-fwnode-for-mipi_dsi_device.patch b/queue-5.15/drm-mipi-dsi-set-the-fwnode-for-mipi_dsi_device.patch new file mode 100644 index 00000000000..ce9b1ca1e4f --- /dev/null +++ b/queue-5.15/drm-mipi-dsi-set-the-fwnode-for-mipi_dsi_device.patch @@ -0,0 +1,48 @@ +From f98488bd73a61d53248c2e1b5eb41060b6a88c58 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Mar 2023 22:39:09 -0800 +Subject: drm/mipi-dsi: Set the fwnode for mipi_dsi_device + +From: Saravana Kannan + +[ Upstream commit a26cc2934331b57b5a7164bff344f0a2ec245fc0 ] + +After commit 3fb16866b51d ("driver core: fw_devlink: Make cycle +detection more robust"), fw_devlink prints an error when consumer +devices don't have their fwnode set. This used to be ignored silently. + +Set the fwnode mipi_dsi_device so fw_devlink can find them and properly +track their dependencies. + +This fixes errors like this: +[ 0.334054] nwl-dsi 30a00000.mipi-dsi: Failed to create device link with regulator-lcd-1v8 +[ 0.346964] nwl-dsi 30a00000.mipi-dsi: Failed to create device link with backlight-dsi + +Reported-by: Martin Kepplinger +Link: https://lore.kernel.org/lkml/2a8e407f4f18c9350f8629a2b5fa18673355b2ae.camel@puri.sm/ +Fixes: 068a00233969 ("drm: Add MIPI DSI bus support") +Signed-off-by: Saravana Kannan +Tested-by: Martin Kepplinger +Link: https://lore.kernel.org/r/20230310063910.2474472-1-saravanak@google.com +Signed-off-by: Maxime Ripard +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/drm_mipi_dsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c +index 19fb1d93a4f07..0c806e99e8690 100644 +--- a/drivers/gpu/drm/drm_mipi_dsi.c ++++ b/drivers/gpu/drm/drm_mipi_dsi.c +@@ -221,7 +221,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host, + return dsi; + } + +- dsi->dev.of_node = info->node; ++ device_set_node(&dsi->dev, of_fwnode_handle(info->node)); + dsi->channel = info->channel; + strlcpy(dsi->name, info->type, sizeof(dsi->name)); + +-- +2.39.2 + diff --git a/queue-5.15/ext4-allow-ext4_get_group_info-to-fail.patch b/queue-5.15/ext4-allow-ext4_get_group_info-to-fail.patch new file mode 100644 index 00000000000..4d4b2f61344 --- /dev/null +++ b/queue-5.15/ext4-allow-ext4_get_group_info-to-fail.patch @@ -0,0 +1,426 @@ +From 300ee4ec69fdc7a5b7c378d0e8a13c73472dd6db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Apr 2023 00:06:28 -0400 +Subject: ext4: allow ext4_get_group_info() to fail + +From: Theodore Ts'o + +[ Upstream commit 5354b2af34064a4579be8bc0e2f15a7b70f14b5f ] + +Previously, ext4_get_group_info() would treat an invalid group number +as BUG(), since in theory it should never happen. However, if a +malicious attaker (or fuzzer) modifies the superblock via the block +device while it is the file system is mounted, it is possible for +s_first_data_block to get set to a very large number. In that case, +when calculating the block group of some block number (such as the +starting block of a preallocation region), could result in an +underflow and very large block group number. Then the BUG_ON check in +ext4_get_group_info() would fire, resutling in a denial of service +attack that can be triggered by root or someone with write access to +the block device. + +For a quality of implementation perspective, it's best that even if +the system administrator does something that they shouldn't, that it +will not trigger a BUG. So instead of BUG'ing, ext4_get_group_info() +will call ext4_error and return NULL. We also add fallback code in +all of the callers of ext4_get_group_info() that it might NULL. + +Also, since ext4_get_group_info() was already borderline to be an +inline function, un-inline it. The results in a next reduction of the +compiled text size of ext4 by roughly 2k. + +Cc: stable@kernel.org +Link: https://lore.kernel.org/r/20230430154311.579720-2-tytso@mit.edu +Reported-by: syzbot+e2efa3efc15a1c9e95c3@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=69b28112e098b070f639efb356393af3ffec4220 +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/ext4/balloc.c | 18 ++++++++++++- + fs/ext4/ext4.h | 15 ++--------- + fs/ext4/ialloc.c | 12 ++++++--- + fs/ext4/mballoc.c | 64 +++++++++++++++++++++++++++++++++++++++-------- + fs/ext4/super.c | 2 ++ + 5 files changed, 82 insertions(+), 29 deletions(-) + +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index 05ff34e925620..fadcb94e80fa1 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -319,6 +319,22 @@ static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb, + return (next_zero_bit < bitmap_size ? next_zero_bit : 0); + } + ++struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ++ ext4_group_t group) ++{ ++ struct ext4_group_info **grp_info; ++ long indexv, indexh; ++ ++ if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) { ++ ext4_error(sb, "invalid group %u", group); ++ return NULL; ++ } ++ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); ++ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); ++ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); ++ return grp_info[indexh]; ++} ++ + /* + * Return the block number which was discovered to be invalid, or 0 if + * the block bitmap is valid. +@@ -393,7 +409,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, + + if (buffer_verified(bh)) + return 0; +- if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) ++ if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; + + ext4_lock_group(sb, block_group); +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 80f0942fa1656..b6e2bb6a736b5 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -2699,6 +2699,8 @@ extern void ext4_check_blocks_bitmap(struct super_block *); + extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + ext4_group_t block_group, + struct buffer_head ** bh); ++extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ++ ext4_group_t group); + extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); + + extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, +@@ -3346,19 +3348,6 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) + raw_inode->i_size_high = cpu_to_le32(i_size >> 32); + } + +-static inline +-struct ext4_group_info *ext4_get_group_info(struct super_block *sb, +- ext4_group_t group) +-{ +- struct ext4_group_info **grp_info; +- long indexv, indexh; +- BUG_ON(group >= EXT4_SB(sb)->s_groups_count); +- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); +- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); +- grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); +- return grp_info[indexh]; +-} +- + /* + * Reading s_groups_count requires using smp_rmb() afterwards. See + * the locking protocol documented in the comments of ext4_group_add() +diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c +index 208b87ce88588..745d781da8915 100644 +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -91,7 +91,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, + + if (buffer_verified(bh)) + return 0; +- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ++ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; + + ext4_lock_group(sb, block_group); +@@ -293,7 +293,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) + } + if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { + grp = ext4_get_group_info(sb, block_group); +- if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) { ++ if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) { + fatal = -EFSCORRUPTED; + goto error_return; + } +@@ -1048,7 +1048,7 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns, + * Skip groups with already-known suspicious inode + * tables + */ +- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ++ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + goto next_group; + } + +@@ -1186,6 +1186,10 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns, + + if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { + grp = ext4_get_group_info(sb, group); ++ if (!grp) { ++ err = -EFSCORRUPTED; ++ goto out; ++ } + down_read(&grp->alloc_sem); /* + * protect vs itable + * lazyinit +@@ -1529,7 +1533,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, + } + + gdp = ext4_get_group_desc(sb, group, &group_desc_bh); +- if (!gdp) ++ if (!gdp || !grp) + goto out; + + /* +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 820804a7afe6e..4cc635633f789 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -745,6 +745,8 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, + MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); + + grp = ext4_get_group_info(sb, e4b->bd_group); ++ if (!grp) ++ return NULL; + list_for_each(cur, &grp->bb_prealloc_list) { + ext4_group_t groupnr; + struct ext4_prealloc_space *pa; +@@ -1110,9 +1112,9 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) + + static noinline_for_stack + void ext4_mb_generate_buddy(struct super_block *sb, +- void *buddy, void *bitmap, ext4_group_t group) ++ void *buddy, void *bitmap, ext4_group_t group, ++ struct ext4_group_info *grp) + { +- struct ext4_group_info *grp = ext4_get_group_info(sb, group); + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; +@@ -1233,6 +1235,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) + break; + + grinfo = ext4_get_group_info(sb, group); ++ if (!grinfo) ++ continue; + /* + * If page is uptodate then we came here after online resize + * which added some new uninitialized group info structs, so +@@ -1298,6 +1302,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) + group, page->index, i * blocksize); + trace_ext4_mb_buddy_bitmap_load(sb, group); + grinfo = ext4_get_group_info(sb, group); ++ if (!grinfo) { ++ err = -EFSCORRUPTED; ++ goto out; ++ } + grinfo->bb_fragments = 0; + memset(grinfo->bb_counters, 0, + sizeof(*grinfo->bb_counters) * +@@ -1308,7 +1316,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) + ext4_lock_group(sb, group); + /* init the buddy */ + memset(data, 0xff, blocksize); +- ext4_mb_generate_buddy(sb, data, incore, group); ++ ext4_mb_generate_buddy(sb, data, incore, group, grinfo); + ext4_unlock_group(sb, group); + incore = NULL; + } else { +@@ -1422,6 +1430,9 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) + might_sleep(); + mb_debug(sb, "init group %u\n", group); + this_grp = ext4_get_group_info(sb, group); ++ if (!this_grp) ++ return -EFSCORRUPTED; ++ + /* + * This ensures that we don't reinit the buddy cache + * page which map to the group from which we are already +@@ -1496,6 +1507,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, + + blocks_per_page = PAGE_SIZE / sb->s_blocksize; + grp = ext4_get_group_info(sb, group); ++ if (!grp) ++ return -EFSCORRUPTED; + + e4b->bd_blkbits = sb->s_blocksize_bits; + e4b->bd_info = grp; +@@ -2206,6 +2219,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, + struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); + struct ext4_free_extent ex; + ++ if (!grp) ++ return -EFSCORRUPTED; + if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) + return 0; + if (grp->bb_free == 0) +@@ -2430,7 +2445,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, + + BUG_ON(cr < 0 || cr >= 4); + +- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) ++ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp)) + return false; + + free = grp->bb_free; +@@ -2499,6 +2514,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, + ext4_grpblk_t free; + int ret = 0; + ++ if (!grp) ++ return -EFSCORRUPTED; + if (sbi->s_mb_stats) + atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]); + if (should_lock) { +@@ -2579,7 +2596,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, + * prefetch once, so we avoid getblk() call, which can + * be expensive. + */ +- if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) && ++ if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) && + EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && +@@ -2623,7 +2640,7 @@ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + group--; + grp = ext4_get_group_info(sb, group); + +- if (EXT4_MB_GRP_NEED_INIT(grp) && ++ if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { +@@ -2883,6 +2900,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) + sizeof(struct ext4_group_info); + + grinfo = ext4_get_group_info(sb, group); ++ if (!grinfo) ++ return 0; + /* Load the group info in memory only if not already loaded. */ + if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { + err = ext4_mb_load_buddy(sb, group, &e4b); +@@ -2893,7 +2912,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) + buddy_loaded = 1; + } + +- memcpy(&sg, ext4_get_group_info(sb, group), i); ++ memcpy(&sg, grinfo, i); + + if (buddy_loaded) + ext4_mb_unload_buddy(&e4b); +@@ -3265,8 +3284,12 @@ static int ext4_mb_init_backend(struct super_block *sb) + + err_freebuddy: + cachep = get_groupinfo_cache(sb->s_blocksize_bits); +- while (i-- > 0) +- kmem_cache_free(cachep, ext4_get_group_info(sb, i)); ++ while (i-- > 0) { ++ struct ext4_group_info *grp = ext4_get_group_info(sb, i); ++ ++ if (grp) ++ kmem_cache_free(cachep, grp); ++ } + i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); +@@ -3562,6 +3585,8 @@ int ext4_mb_release(struct super_block *sb) + for (i = 0; i < ngroups; i++) { + cond_resched(); + grinfo = ext4_get_group_info(sb, i); ++ if (!grinfo) ++ continue; + mb_group_bb_bitmap_free(grinfo); + ext4_lock_group(sb, i); + count = ext4_mb_cleanup_pa(grinfo); +@@ -4466,6 +4491,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, + struct ext4_free_data *entry; + + grp = ext4_get_group_info(sb, group); ++ if (!grp) ++ return; + n = rb_first(&(grp->bb_free_root)); + + while (n) { +@@ -4493,6 +4520,9 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, + int preallocated = 0; + int len; + ++ if (!grp) ++ return; ++ + /* all form of preallocation discards first load group, + * so the only competing code is preallocation use. + * we don't need any locking here +@@ -4684,6 +4714,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) + + ei = EXT4_I(ac->ac_inode); + grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); ++ if (!grp) ++ return; + + pa->pa_obj_lock = &ei->i_prealloc_lock; + pa->pa_inode = ac->ac_inode; +@@ -4737,6 +4769,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) + atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); + + grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); ++ if (!grp) ++ return; + lg = ac->ac_lg; + BUG_ON(lg == NULL); + +@@ -4865,6 +4899,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, + int err; + int free = 0; + ++ if (!grp) ++ return 0; + mb_debug(sb, "discard preallocation for group %u\n", group); + if (list_empty(&grp->bb_prealloc_list)) + goto out_dbg; +@@ -5102,6 +5138,9 @@ static inline void ext4_mb_show_pa(struct super_block *sb) + struct ext4_prealloc_space *pa; + ext4_grpblk_t start; + struct list_head *cur; ++ ++ if (!grp) ++ continue; + ext4_lock_group(sb, i); + list_for_each(cur, &grp->bb_prealloc_list) { + pa = list_entry(cur, struct ext4_prealloc_space, +@@ -5908,6 +5947,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + struct buffer_head *bitmap_bh = NULL; + struct super_block *sb = inode->i_sb; + struct ext4_group_desc *gdp; ++ struct ext4_group_info *grp; + unsigned int overflow; + ext4_grpblk_t bit; + struct buffer_head *gd_bh; +@@ -5933,8 +5973,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + overflow = 0; + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); + +- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT( +- ext4_get_group_info(sb, block_group)))) ++ grp = ext4_get_group_info(sb, block_group); ++ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) + return; + + /* +@@ -6537,6 +6577,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + + for (group = first_group; group <= last_group; group++) { + grp = ext4_get_group_info(sb, group); ++ if (!grp) ++ continue; + /* We only do this if the grp has never been initialized */ + if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { + ret = ext4_mb_init_group(sb, group, GFP_NOFS); +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index c527ec2b041fb..bf8a780cd69b6 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1017,6 +1017,8 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb, + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); + int ret; + ++ if (!grp || !gdp) ++ return; + if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) { + ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, + &grp->bb_state); +-- +2.39.2 + diff --git a/queue-5.15/ext4-allow-to-find-by-goal-if-ext4_mb_hint_goal_only.patch b/queue-5.15/ext4-allow-to-find-by-goal-if-ext4_mb_hint_goal_only.patch new file mode 100644 index 00000000000..a91595c9f7f --- /dev/null +++ b/queue-5.15/ext4-allow-to-find-by-goal-if-ext4_mb_hint_goal_only.patch @@ -0,0 +1,43 @@ +From f10fdb183b1950f67e0427f14abc04937f01328c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 Mar 2023 01:21:02 +0800 +Subject: ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set + +From: Kemeng Shi + +[ Upstream commit 01e4ca29451760b9ac10b4cdc231c52150842643 ] + +If EXT4_MB_HINT_GOAL_ONLY is set, ext4_mb_regular_allocator will only +allocate blocks from ext4_mb_find_by_goal. Allow to find by goal in +ext4_mb_find_by_goal if EXT4_MB_HINT_GOAL_ONLY is set or allocation +with EXT4_MB_HINT_GOAL_ONLY set will always fail. + +EXT4_MB_HINT_GOAL_ONLY is not used at all, so the problem is not +found for now. + +Signed-off-by: Kemeng Shi +Reviewed-by: Ojaswin Mujoo +Link: https://lore.kernel.org/r/20230303172120.3800725-3-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 5354b2af3406 ("ext4: allow ext4_get_group_info() to fail") +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 0e0226b30db6a..820804a7afe6e 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2206,7 +2206,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, + struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); + struct ext4_free_extent ex; + +- if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) ++ if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) + return 0; + if (grp->bb_free == 0) + return 0; +-- +2.39.2 + diff --git a/queue-5.15/ext4-don-t-clear-sb_rdonly-when-remounting-r-w-until.patch b/queue-5.15/ext4-don-t-clear-sb_rdonly-when-remounting-r-w-until.patch new file mode 100644 index 00000000000..69cadab45a6 --- /dev/null +++ b/queue-5.15/ext4-don-t-clear-sb_rdonly-when-remounting-r-w-until.patch @@ -0,0 +1,77 @@ +From bd7f67ef2b9afa2b28c5d4e4334753d4cc1bc7fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 May 2023 21:02:30 -0400 +Subject: ext4: don't clear SB_RDONLY when remounting r/w until quota is + re-enabled + +From: Theodore Ts'o + +[ Upstream commit a44be64bbecb15a452496f60db6eacfee2b59c79 ] + +When a file system currently mounted read/only is remounted +read/write, if we clear the SB_RDONLY flag too early, before the quota +is initialized, and there is another process/thread constantly +attempting to create a directory, it's possible to trigger the + + WARN_ON_ONCE(dquot_initialize_needed(inode)); + +in ext4_xattr_block_set(), with the following stack trace: + + WARNING: CPU: 0 PID: 5338 at fs/ext4/xattr.c:2141 ext4_xattr_block_set+0x2ef2/0x3680 + RIP: 0010:ext4_xattr_block_set+0x2ef2/0x3680 fs/ext4/xattr.c:2141 + Call Trace: + ext4_xattr_set_handle+0xcd4/0x15c0 fs/ext4/xattr.c:2458 + ext4_initxattrs+0xa3/0x110 fs/ext4/xattr_security.c:44 + security_inode_init_security+0x2df/0x3f0 security/security.c:1147 + __ext4_new_inode+0x347e/0x43d0 fs/ext4/ialloc.c:1324 + ext4_mkdir+0x425/0xce0 fs/ext4/namei.c:2992 + vfs_mkdir+0x29d/0x450 fs/namei.c:4038 + do_mkdirat+0x264/0x520 fs/namei.c:4061 + __do_sys_mkdirat fs/namei.c:4076 [inline] + __se_sys_mkdirat fs/namei.c:4074 [inline] + __x64_sys_mkdirat+0x89/0xa0 fs/namei.c:4074 + +Cc: stable@kernel.org +Link: https://lore.kernel.org/r/20230506142419.984260-1-tytso@mit.edu +Reported-by: syzbot+6385d7d3065524c5ca6d@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=6513f6cb5cd6b5fc9f37e3bb70d273b94be9c34c +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index d062bad1384be..c527ec2b041fb 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5746,6 +5746,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + struct ext4_mount_options old_opts; + ext4_group_t g; + int err = 0; ++ int enable_rw = 0; + #ifdef CONFIG_QUOTA + int enable_quota = 0; + int i, j; +@@ -5946,7 +5947,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + if (err) + goto restore_opts; + +- sb->s_flags &= ~SB_RDONLY; ++ enable_rw = 1; + if (ext4_has_feature_mmp(sb)) { + err = ext4_multi_mount_protect(sb, + le64_to_cpu(es->s_mmp_block)); +@@ -6005,6 +6006,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) + ext4_release_system_zone(sb); + ++ if (enable_rw) ++ sb->s_flags &= ~SB_RDONLY; ++ + if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) + ext4_stop_mmpd(sbi); + +-- +2.39.2 + diff --git a/queue-5.15/ext4-fix-lockdep-warning-when-enabling-mmp.patch b/queue-5.15/ext4-fix-lockdep-warning-when-enabling-mmp.patch new file mode 100644 index 00000000000..92a11693cb9 --- /dev/null +++ b/queue-5.15/ext4-fix-lockdep-warning-when-enabling-mmp.patch @@ -0,0 +1,94 @@ +From 788013b552503bfb6b05ee55dbbde0c8ad75cb1d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Apr 2023 14:10:19 +0200 +Subject: ext4: fix lockdep warning when enabling MMP + +From: Jan Kara + +[ Upstream commit 949f95ff39bf188e594e7ecd8e29b82eb108f5bf ] + +When we enable MMP in ext4_multi_mount_protect() during mount or +remount, we end up calling sb_start_write() from write_mmp_block(). This +triggers lockdep warning because freeze protection ranks above s_umount +semaphore we are holding during mount / remount. The problem is harmless +because we are guaranteed the filesystem is not frozen during mount / +remount but still let's fix the warning by not grabbing freeze +protection from ext4_multi_mount_protect(). + +Cc: stable@kernel.org +Reported-by: syzbot+6b7df7d5506b32467149@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=ab7e5b6f400b7778d46f01841422e5718fb81843 +Signed-off-by: Jan Kara +Reviewed-by: Christian Brauner +Link: https://lore.kernel.org/r/20230411121019.21940-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/mmp.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c +index 28129a8db713c..3e8bce19ad16d 100644 +--- a/fs/ext4/mmp.c ++++ b/fs/ext4/mmp.c +@@ -39,28 +39,36 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) + * Write the MMP block using REQ_SYNC to try to get the block on-disk + * faster. + */ +-static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) ++static int write_mmp_block_thawed(struct super_block *sb, ++ struct buffer_head *bh) + { + struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); + +- /* +- * We protect against freezing so that we don't create dirty buffers +- * on frozen filesystem. +- */ +- sb_start_write(sb); + ext4_mmp_csum_set(sb, mmp); + lock_buffer(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); + wait_on_buffer(bh); +- sb_end_write(sb); + if (unlikely(!buffer_uptodate(bh))) + return -EIO; +- + return 0; + } + ++static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) ++{ ++ int err; ++ ++ /* ++ * We protect against freezing so that we don't create dirty buffers ++ * on frozen filesystem. ++ */ ++ sb_start_write(sb); ++ err = write_mmp_block_thawed(sb, bh); ++ sb_end_write(sb); ++ return err; ++} ++ + /* + * Read the MMP block. It _must_ be read from disk and hence we clear the + * uptodate flag on the buffer. +@@ -352,7 +360,11 @@ int ext4_multi_mount_protect(struct super_block *sb, + seq = mmp_new_seq(); + mmp->mmp_seq = cpu_to_le32(seq); + +- retval = write_mmp_block(sb, bh); ++ /* ++ * On mount / remount we are protected against fs freezing (by s_umount ++ * semaphore) and grabbing freeze protection upsets lockdep ++ */ ++ retval = write_mmp_block_thawed(sb, bh); + if (retval) + goto failed; + +-- +2.39.2 + diff --git a/queue-5.15/ext4-reflect-error-codes-from-ext4_multi_mount_prote.patch b/queue-5.15/ext4-reflect-error-codes-from-ext4_multi_mount_prote.patch new file mode 100644 index 00000000000..497b0a2945a --- /dev/null +++ b/queue-5.15/ext4-reflect-error-codes-from-ext4_multi_mount_prote.patch @@ -0,0 +1,128 @@ +From 0585a642a9cb2b0e7349487ddc8310c653af8b0d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Apr 2023 22:49:34 -0400 +Subject: ext4: reflect error codes from ext4_multi_mount_protect() to its + callers + +From: Theodore Ts'o + +[ Upstream commit 3b50d5018ed06a647bb26c44bb5ae74e59c903c7 ] + +This will allow more fine-grained errno codes to be returned by the +mount system call. + +Cc: Andreas Dilger +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled") +Signed-off-by: Sasha Levin +--- + fs/ext4/mmp.c | 9 ++++++++- + fs/ext4/super.c | 16 +++++++++------- + 2 files changed, 17 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c +index cebea4270817e..28129a8db713c 100644 +--- a/fs/ext4/mmp.c ++++ b/fs/ext4/mmp.c +@@ -290,6 +290,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + if (mmp_block < le32_to_cpu(es->s_first_data_block) || + mmp_block >= ext4_blocks_count(es)) { + ext4_warning(sb, "Invalid MMP block in superblock"); ++ retval = -EINVAL; + goto failed; + } + +@@ -315,6 +316,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + + if (seq == EXT4_MMP_SEQ_FSCK) { + dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); ++ retval = -EBUSY; + goto failed; + } + +@@ -328,6 +330,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + + if (schedule_timeout_interruptible(HZ * wait_time) != 0) { + ext4_warning(sb, "MMP startup interrupted, failing mount\n"); ++ retval = -ETIMEDOUT; + goto failed; + } + +@@ -338,6 +341,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, + "Device is already active on another node."); ++ retval = -EBUSY; + goto failed; + } + +@@ -357,6 +361,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + */ + if (schedule_timeout_interruptible(HZ * wait_time) != 0) { + ext4_warning(sb, "MMP startup interrupted, failing mount"); ++ retval = -ETIMEDOUT; + goto failed; + } + +@@ -367,6 +372,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, + "Device is already active on another node."); ++ retval = -EBUSY; + goto failed; + } + +@@ -383,6 +389,7 @@ int ext4_multi_mount_protect(struct super_block *sb, + EXT4_SB(sb)->s_mmp_tsk = NULL; + ext4_warning(sb, "Unable to create kmmpd thread for %s.", + sb->s_id); ++ retval = -ENOMEM; + goto failed; + } + +@@ -390,5 +397,5 @@ int ext4_multi_mount_protect(struct super_block *sb, + + failed: + brelse(bh); +- return 1; ++ return retval; + } +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index ca0997fcd1215..d062bad1384be 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4646,9 +4646,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + ext4_has_feature_orphan_present(sb) || + ext4_has_feature_journal_needs_recovery(sb)); + +- if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) +- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) ++ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) { ++ err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)); ++ if (err) + goto failed_mount3a; ++ } + + /* + * The first inode we look at is the journal inode. Don't try +@@ -5945,12 +5947,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + goto restore_opts; + + sb->s_flags &= ~SB_RDONLY; +- if (ext4_has_feature_mmp(sb)) +- if (ext4_multi_mount_protect(sb, +- le64_to_cpu(es->s_mmp_block))) { +- err = -EROFS; ++ if (ext4_has_feature_mmp(sb)) { ++ err = ext4_multi_mount_protect(sb, ++ le64_to_cpu(es->s_mmp_block)); ++ if (err) + goto restore_opts; +- } ++ } + #ifdef CONFIG_QUOTA + enable_quota = 1; + #endif +-- +2.39.2 + diff --git a/queue-5.15/ext4-remove-an-unused-variable-warning-with-config_q.patch b/queue-5.15/ext4-remove-an-unused-variable-warning-with-config_q.patch new file mode 100644 index 00000000000..16755b57490 --- /dev/null +++ b/queue-5.15/ext4-remove-an-unused-variable-warning-with-config_q.patch @@ -0,0 +1,63 @@ +From 518e14f3716293e9dcda16fd6533c4035a4ecc31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Aug 2021 04:49:29 +0100 +Subject: ext4: remove an unused variable warning with CONFIG_QUOTA=n +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Austin Kim + +[ Upstream commit 3bbef91bdd2180c67407285ba160b023eb4d5306 ] + +The 'enable_quota' variable is only used in an CONFIG_QUOTA. +With CONFIG_QUOTA=n, compiler causes a harmless warning: + +fs/ext4/super.c: In function ‘ext4_remount’: +fs/ext4/super.c:5840:6: warning: variable ‘enable_quota’ set but not used + [-Wunused-but-set-variable] + int enable_quota = 0; + ^~~~~ + +Move 'enable_quota' into the same #ifdef CONFIG_QUOTA block +to remove an unused variable warning. + +Signed-off-by: Austin Kim +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20210824034929.GA13415@raspberrypi +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled") +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index e6cd2bf9508e4..ca0997fcd1215 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5742,10 +5742,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned long old_sb_flags, vfs_flags; + struct ext4_mount_options old_opts; +- int enable_quota = 0; + ext4_group_t g; + int err = 0; + #ifdef CONFIG_QUOTA ++ int enable_quota = 0; + int i, j; + char *to_free[EXT4_MAXQUOTAS]; + #endif +@@ -5951,7 +5951,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) + err = -EROFS; + goto restore_opts; + } ++#ifdef CONFIG_QUOTA + enable_quota = 1; ++#endif + } + } + +-- +2.39.2 + diff --git a/queue-5.15/fbdev-arcfb-fix-error-handling-in-arcfb_probe.patch b/queue-5.15/fbdev-arcfb-fix-error-handling-in-arcfb_probe.patch new file mode 100644 index 00000000000..72c4d137ffd --- /dev/null +++ b/queue-5.15/fbdev-arcfb-fix-error-handling-in-arcfb_probe.patch @@ -0,0 +1,81 @@ +From 549a46f10fb927383e65542761423419e81789c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 19:27:26 +0800 +Subject: fbdev: arcfb: Fix error handling in arcfb_probe() + +From: Zongjie Li + +[ Upstream commit 5a6bef734247c7a8c19511664ff77634ab86f45b ] + +Smatch complains that: +arcfb_probe() warn: 'irq' from request_irq() not released on lines: 587. + +Fix error handling in the arcfb_probe() function. If IO addresses are +not provided or framebuffer registration fails, the code will jump to +the err_addr or err_register_fb label to release resources. +If IRQ request fails, previously allocated resources will be freed. + +Fixes: 1154ea7dcd8e ("[PATCH] Framebuffer driver for Arc LCD board") +Signed-off-by: Zongjie Li +Reviewed-by: Dongliang Mu +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/arcfb.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c +index 45e64016db328..024d0ee4f04f9 100644 +--- a/drivers/video/fbdev/arcfb.c ++++ b/drivers/video/fbdev/arcfb.c +@@ -523,7 +523,7 @@ static int arcfb_probe(struct platform_device *dev) + + info = framebuffer_alloc(sizeof(struct arcfb_par), &dev->dev); + if (!info) +- goto err; ++ goto err_fb_alloc; + + info->screen_base = (char __iomem *)videomemory; + info->fbops = &arcfb_ops; +@@ -535,7 +535,7 @@ static int arcfb_probe(struct platform_device *dev) + + if (!dio_addr || !cio_addr || !c2io_addr) { + printk(KERN_WARNING "no IO addresses supplied\n"); +- goto err1; ++ goto err_addr; + } + par->dio_addr = dio_addr; + par->cio_addr = cio_addr; +@@ -551,12 +551,12 @@ static int arcfb_probe(struct platform_device *dev) + printk(KERN_INFO + "arcfb: Failed req IRQ %d\n", par->irq); + retval = -EBUSY; +- goto err1; ++ goto err_addr; + } + } + retval = register_framebuffer(info); + if (retval < 0) +- goto err1; ++ goto err_register_fb; + platform_set_drvdata(dev, info); + fb_info(info, "Arc frame buffer device, using %dK of video memory\n", + videomemorysize >> 10); +@@ -580,9 +580,12 @@ static int arcfb_probe(struct platform_device *dev) + } + + return 0; +-err1: ++ ++err_register_fb: ++ free_irq(par->irq, info); ++err_addr: + framebuffer_release(info); +-err: ++err_fb_alloc: + vfree(videomemory); + return retval; + } +-- +2.39.2 + diff --git a/queue-5.15/gve-remove-the-code-of-clearing-pba-bit.patch b/queue-5.15/gve-remove-the-code-of-clearing-pba-bit.patch new file mode 100644 index 00000000000..8ed7d3228de --- /dev/null +++ b/queue-5.15/gve-remove-the-code-of-clearing-pba-bit.patch @@ -0,0 +1,50 @@ +From a476f8c432738b58c6c3a6a85bfbee8d0d141e3f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 15:51:23 -0700 +Subject: gve: Remove the code of clearing PBA bit + +From: Ziwei Xiao + +[ Upstream commit f4c2e67c1773d2a2632381ee30e9139c1e744c16 ] + +Clearing the PBA bit from the driver is race prone and it may lead to +dropped interrupt events. This could potentially lead to the traffic +being completely halted. + +Fixes: 5e8c5adf95f8 ("gve: DQO: Add core netdev features") +Signed-off-by: Ziwei Xiao +Signed-off-by: Bailey Forrest +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/google/gve/gve_main.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c +index 49850cf7cfafd..c0ea1b185e1bd 100644 +--- a/drivers/net/ethernet/google/gve/gve_main.c ++++ b/drivers/net/ethernet/google/gve/gve_main.c +@@ -233,19 +233,6 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) + bool reschedule = false; + int work_done = 0; + +- /* Clear PCI MSI-X Pending Bit Array (PBA) +- * +- * This bit is set if an interrupt event occurs while the vector is +- * masked. If this bit is set and we reenable the interrupt, it will +- * fire again. Since we're just about to poll the queue state, we don't +- * need it to fire again. +- * +- * Under high softirq load, it's possible that the interrupt condition +- * is triggered twice before we got the chance to process it. +- */ +- gve_write_irq_doorbell_dqo(priv, block, +- GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); +- + if (block->tx) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + +-- +2.39.2 + diff --git a/queue-5.15/ipvlan-fix-out-of-bounds-caused-by-unclear-skb-cb.patch b/queue-5.15/ipvlan-fix-out-of-bounds-caused-by-unclear-skb-cb.patch new file mode 100644 index 00000000000..ab53211be57 --- /dev/null +++ b/queue-5.15/ipvlan-fix-out-of-bounds-caused-by-unclear-skb-cb.patch @@ -0,0 +1,172 @@ +From 72a55011970a2ef4ca7cded9f979961eb635ac5c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 May 2023 11:50:44 +0800 +Subject: ipvlan:Fix out-of-bounds caused by unclear skb->cb + +From: t.feng + +[ Upstream commit 90cbed5247439a966b645b34eb0a2e037836ea8e ] + +If skb enqueue the qdisc, fq_skb_cb(skb)->time_to_send is changed which +is actually skb->cb, and IPCB(skb_in)->opt will be used in +__ip_options_echo. It is possible that memcpy is out of bounds and lead +to stack overflow. +We should clear skb->cb before ip_local_out or ip6_local_out. + +v2: +1. clean the stack info +2. use IPCB/IP6CB instead of skb->cb + +crash on stable-5.10(reproduce in kasan kernel). +Stack info: +[ 2203.651571] BUG: KASAN: stack-out-of-bounds in +__ip_options_echo+0x589/0x800 +[ 2203.653327] Write of size 4 at addr ffff88811a388f27 by task +swapper/3/0 +[ 2203.655460] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Not tainted +5.10.0-60.18.0.50.h856.kasan.eulerosv2r11.x86_64 #1 +[ 2203.655466] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), +BIOS rel-1.10.2-0-g5f4c7b1-20181220_000000-szxrtosci10000 04/01/2014 +[ 2203.655475] Call Trace: +[ 2203.655481] +[ 2203.655501] dump_stack+0x9c/0xd3 +[ 2203.655514] print_address_description.constprop.0+0x19/0x170 +[ 2203.655530] __kasan_report.cold+0x6c/0x84 +[ 2203.655586] kasan_report+0x3a/0x50 +[ 2203.655594] check_memory_region+0xfd/0x1f0 +[ 2203.655601] memcpy+0x39/0x60 +[ 2203.655608] __ip_options_echo+0x589/0x800 +[ 2203.655654] __icmp_send+0x59a/0x960 +[ 2203.655755] nf_send_unreach+0x129/0x3d0 [nf_reject_ipv4] +[ 2203.655763] reject_tg+0x77/0x1bf [ipt_REJECT] +[ 2203.655772] ipt_do_table+0x691/0xa40 [ip_tables] +[ 2203.655821] nf_hook_slow+0x69/0x100 +[ 2203.655828] __ip_local_out+0x21e/0x2b0 +[ 2203.655857] ip_local_out+0x28/0x90 +[ 2203.655868] ipvlan_process_v4_outbound+0x21e/0x260 [ipvlan] +[ 2203.655931] ipvlan_xmit_mode_l3+0x3bd/0x400 [ipvlan] +[ 2203.655967] ipvlan_queue_xmit+0xb3/0x190 [ipvlan] +[ 2203.655977] ipvlan_start_xmit+0x2e/0xb0 [ipvlan] +[ 2203.655984] xmit_one.constprop.0+0xe1/0x280 +[ 2203.655992] dev_hard_start_xmit+0x62/0x100 +[ 2203.656000] sch_direct_xmit+0x215/0x640 +[ 2203.656028] __qdisc_run+0x153/0x1f0 +[ 2203.656069] __dev_queue_xmit+0x77f/0x1030 +[ 2203.656173] ip_finish_output2+0x59b/0xc20 +[ 2203.656244] __ip_finish_output.part.0+0x318/0x3d0 +[ 2203.656312] ip_finish_output+0x168/0x190 +[ 2203.656320] ip_output+0x12d/0x220 +[ 2203.656357] __ip_queue_xmit+0x392/0x880 +[ 2203.656380] __tcp_transmit_skb+0x1088/0x11c0 +[ 2203.656436] __tcp_retransmit_skb+0x475/0xa30 +[ 2203.656505] tcp_retransmit_skb+0x2d/0x190 +[ 2203.656512] tcp_retransmit_timer+0x3af/0x9a0 +[ 2203.656519] tcp_write_timer_handler+0x3ba/0x510 +[ 2203.656529] tcp_write_timer+0x55/0x180 +[ 2203.656542] call_timer_fn+0x3f/0x1d0 +[ 2203.656555] expire_timers+0x160/0x200 +[ 2203.656562] run_timer_softirq+0x1f4/0x480 +[ 2203.656606] __do_softirq+0xfd/0x402 +[ 2203.656613] asm_call_irq_on_stack+0x12/0x20 +[ 2203.656617] +[ 2203.656623] do_softirq_own_stack+0x37/0x50 +[ 2203.656631] irq_exit_rcu+0x134/0x1a0 +[ 2203.656639] sysvec_apic_timer_interrupt+0x36/0x80 +[ 2203.656646] asm_sysvec_apic_timer_interrupt+0x12/0x20 +[ 2203.656654] RIP: 0010:default_idle+0x13/0x20 +[ 2203.656663] Code: 89 f0 5d 41 5c 41 5d 41 5e c3 cc cc cc cc cc cc cc +cc cc cc cc cc cc 0f 1f 44 00 00 0f 1f 44 00 00 0f 00 2d 9f 32 57 00 fb +f4 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 54 be 08 +[ 2203.656668] RSP: 0018:ffff88810036fe78 EFLAGS: 00000256 +[ 2203.656676] RAX: ffffffffaf2a87f0 RBX: ffff888100360000 RCX: +ffffffffaf290191 +[ 2203.656681] RDX: 0000000000098b5e RSI: 0000000000000004 RDI: +ffff88811a3c4f60 +[ 2203.656686] RBP: 0000000000000000 R08: 0000000000000001 R09: +ffff88811a3c4f63 +[ 2203.656690] R10: ffffed10234789ec R11: 0000000000000001 R12: +0000000000000003 +[ 2203.656695] R13: ffff888100360000 R14: 0000000000000000 R15: +0000000000000000 +[ 2203.656729] default_idle_call+0x5a/0x150 +[ 2203.656735] cpuidle_idle_call+0x1c6/0x220 +[ 2203.656780] do_idle+0xab/0x100 +[ 2203.656786] cpu_startup_entry+0x19/0x20 +[ 2203.656793] secondary_startup_64_no_verify+0xc2/0xcb + +[ 2203.657409] The buggy address belongs to the page: +[ 2203.658648] page:0000000027a9842f refcount:1 mapcount:0 +mapping:0000000000000000 index:0x0 pfn:0x11a388 +[ 2203.658665] flags: +0x17ffffc0001000(reserved|node=0|zone=2|lastcpupid=0x1fffff) +[ 2203.658675] raw: 0017ffffc0001000 ffffea000468e208 ffffea000468e208 +0000000000000000 +[ 2203.658682] raw: 0000000000000000 0000000000000000 00000001ffffffff +0000000000000000 +[ 2203.658686] page dumped because: kasan: bad access detected + +To reproduce(ipvlan with IPVLAN_MODE_L3): +Env setting: +======================================================= +modprobe ipvlan ipvlan_default_mode=1 +sysctl net.ipv4.conf.eth0.forwarding=1 +iptables -t nat -A POSTROUTING -s 20.0.0.0/255.255.255.0 -o eth0 -j +MASQUERADE +ip link add gw link eth0 type ipvlan +ip -4 addr add 20.0.0.254/24 dev gw +ip netns add net1 +ip link add ipv1 link eth0 type ipvlan +ip link set ipv1 netns net1 +ip netns exec net1 ip link set ipv1 up +ip netns exec net1 ip -4 addr add 20.0.0.4/24 dev ipv1 +ip netns exec net1 route add default gw 20.0.0.254 +ip netns exec net1 tc qdisc add dev ipv1 root netem loss 10% +ifconfig gw up +iptables -t filter -A OUTPUT -p tcp --dport 8888 -j REJECT --reject-with +icmp-port-unreachable +======================================================= +And then excute the shell(curl any address of eth0 can reach): + +for((i=1;i<=100000;i++)) +do + ip netns exec net1 curl x.x.x.x:8888 +done +======================================================= + +Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") +Signed-off-by: "t.feng" +Suggested-by: Florian Westphal +Reviewed-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_core.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c +index d7fb6302d699b..1f5125698e83a 100644 +--- a/drivers/net/ipvlan/ipvlan_core.c ++++ b/drivers/net/ipvlan/ipvlan_core.c +@@ -437,6 +437,9 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) + goto err; + } + skb_dst_set(skb, &rt->dst); ++ ++ memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ++ + err = ip_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; +@@ -475,6 +478,9 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) + goto err; + } + skb_dst_set(skb, dst); ++ ++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ++ + err = ip6_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; +-- +2.39.2 + diff --git a/queue-5.15/linux-dim-do-nothing-if-no-time-delta-between-sample.patch b/queue-5.15/linux-dim-do-nothing-if-no-time-delta-between-sample.patch new file mode 100644 index 00000000000..81ae950807a --- /dev/null +++ b/queue-5.15/linux-dim-do-nothing-if-no-time-delta-between-sample.patch @@ -0,0 +1,108 @@ +From 4985ecdc4e83f4844ed996e3363ed92d766226af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 May 2023 16:57:43 +0300 +Subject: linux/dim: Do nothing if no time delta between samples + +From: Roy Novich + +[ Upstream commit 162bd18eb55adf464a0fa2b4144b8d61c75ff7c2 ] + +Add return value for dim_calc_stats. This is an indication for the +caller if curr_stats was assigned by the function. Avoid using +curr_stats uninitialized over {rdma/net}_dim, when no time delta between +samples. Coverity reported this potential use of an uninitialized +variable. + +Fixes: 4c4dbb4a7363 ("net/mlx5e: Move dynamic interrupt coalescing code to include/linux") +Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing") +Signed-off-by: Roy Novich +Reviewed-by: Aya Levin +Reviewed-by: Saeed Mahameed +Signed-off-by: Tariq Toukan +Reviewed-by: Leon Romanovsky +Reviewed-by: Michal Kubiak +Link: https://lore.kernel.org/r/20230507135743.138993-1-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/linux/dim.h | 3 ++- + lib/dim/dim.c | 5 +++-- + lib/dim/net_dim.c | 3 ++- + lib/dim/rdma_dim.c | 3 ++- + 4 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/include/linux/dim.h b/include/linux/dim.h +index 6c5733981563e..f343bc9aa2ec9 100644 +--- a/include/linux/dim.h ++++ b/include/linux/dim.h +@@ -236,8 +236,9 @@ void dim_park_tired(struct dim *dim); + * + * Calculate the delta between two samples (in data rates). + * Takes into consideration counter wrap-around. ++ * Returned boolean indicates whether curr_stats are reliable. + */ +-void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, ++bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats); + + /** +diff --git a/lib/dim/dim.c b/lib/dim/dim.c +index 38045d6d05381..e89aaf07bde50 100644 +--- a/lib/dim/dim.c ++++ b/lib/dim/dim.c +@@ -54,7 +54,7 @@ void dim_park_tired(struct dim *dim) + } + EXPORT_SYMBOL(dim_park_tired); + +-void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, ++bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats) + { + /* u32 holds up to 71 minutes, should be enough */ +@@ -66,7 +66,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + start->comp_ctr); + + if (!delta_us) +- return; ++ return false; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); +@@ -79,5 +79,6 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + else + curr_stats->cpe_ratio = 0; + ++ return true; + } + EXPORT_SYMBOL(dim_calc_stats); +diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c +index 53f6b9c6e9366..4e32f7aaac86c 100644 +--- a/lib/dim/net_dim.c ++++ b/lib/dim/net_dim.c +@@ -227,7 +227,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) + dim->start_sample.event_ctr); + if (nevents < DIM_NEVENTS) + break; +- dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); ++ if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats)) ++ break; + if (net_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); +diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c +index 15462d54758d3..88f7794867078 100644 +--- a/lib/dim/rdma_dim.c ++++ b/lib/dim/rdma_dim.c +@@ -88,7 +88,8 @@ void rdma_dim(struct dim *dim, u64 completions) + nevents = curr_sample->event_ctr - dim->start_sample.event_ctr; + if (nevents < DIM_NEVENTS) + break; +- dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats); ++ if (!dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats)) ++ break; + if (rdma_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); +-- +2.39.2 + diff --git a/queue-5.15/net-add-vlan_get_protocol_and_depth-helper.patch b/queue-5.15/net-add-vlan_get_protocol_and_depth-helper.patch new file mode 100644 index 00000000000..9305c082a42 --- /dev/null +++ b/queue-5.15/net-add-vlan_get_protocol_and_depth-helper.patch @@ -0,0 +1,174 @@ +From af04a9b0c6ccd49abf0038acb530403127484e3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 13:18:57 +0000 +Subject: net: add vlan_get_protocol_and_depth() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Eric Dumazet + +[ Upstream commit 4063384ef762cc5946fc7a3f89879e76c6ec51e2 ] + +Before blamed commit, pskb_may_pull() was used instead +of skb_header_pointer() in __vlan_get_protocol() and friends. + +Few callers depended on skb->head being populated with MAC header, +syzbot caught one of them (skb_mac_gso_segment()) + +Add vlan_get_protocol_and_depth() to make the intent clearer +and use it where sensible. + +This is a more generic fix than commit e9d3f80935b6 +("net/af_packet: make sure to pull mac header") which was +dealing with a similar issue. + +kernel BUG at include/linux/skbuff.h:2655 ! +invalid opcode: 0000 [#1] SMP KASAN +CPU: 0 PID: 1441 Comm: syz-executor199 Not tainted 6.1.24-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/14/2023 +RIP: 0010:__skb_pull include/linux/skbuff.h:2655 [inline] +RIP: 0010:skb_mac_gso_segment+0x68f/0x6a0 net/core/gro.c:136 +Code: fd 48 8b 5c 24 10 44 89 6b 70 48 c7 c7 c0 ae 0d 86 44 89 e6 e8 a1 91 d0 00 48 c7 c7 00 af 0d 86 48 89 de 31 d2 e8 d1 4a e9 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 +RSP: 0018:ffffc90001bd7520 EFLAGS: 00010286 +RAX: ffffffff8469736a RBX: ffff88810f31dac0 RCX: ffff888115a18b00 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: ffffc90001bd75e8 R08: ffffffff84697183 R09: fffff5200037adf9 +R10: 0000000000000000 R11: dffffc0000000001 R12: 0000000000000012 +R13: 000000000000fee5 R14: 0000000000005865 R15: 000000000000fed7 +FS: 000055555633f300(0000) GS:ffff8881f6a00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020000000 CR3: 0000000116fea000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + +[] __skb_gso_segment+0x32d/0x4c0 net/core/dev.c:3419 +[] skb_gso_segment include/linux/netdevice.h:4819 [inline] +[] validate_xmit_skb+0x3aa/0xee0 net/core/dev.c:3725 +[] __dev_queue_xmit+0x1332/0x3300 net/core/dev.c:4313 +[] dev_queue_xmit+0x17/0x20 include/linux/netdevice.h:3029 +[] packet_snd net/packet/af_packet.c:3111 [inline] +[] packet_sendmsg+0x49d2/0x6470 net/packet/af_packet.c:3142 +[] sock_sendmsg_nosec net/socket.c:716 [inline] +[] sock_sendmsg net/socket.c:736 [inline] +[] __sys_sendto+0x472/0x5f0 net/socket.c:2139 +[] __do_sys_sendto net/socket.c:2151 [inline] +[] __se_sys_sendto net/socket.c:2147 [inline] +[] __x64_sys_sendto+0xe5/0x100 net/socket.c:2147 +[] do_syscall_x64 arch/x86/entry/common.c:50 [inline] +[] do_syscall_64+0x2f/0x50 arch/x86/entry/common.c:80 +[] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Fixes: 469aceddfa3e ("vlan: consolidate VLAN parsing code and limit max parsing depth") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Toke Høiland-Jørgensen +Cc: Willem de Bruijn +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/tap.c | 4 ++-- + include/linux/if_vlan.h | 17 +++++++++++++++++ + net/bridge/br_forward.c | 2 +- + net/core/dev.c | 2 +- + net/packet/af_packet.c | 6 ++---- + 5 files changed, 23 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/tap.c b/drivers/net/tap.c +index 854ed2f21d32c..663ce0e09c2de 100644 +--- a/drivers/net/tap.c ++++ b/drivers/net/tap.c +@@ -714,7 +714,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, + + /* Move network header to the right position for VLAN tagged packets */ + if (eth_type_vlan(skb->protocol) && +- __vlan_get_protocol(skb, skb->protocol, &depth) != 0) ++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + + rcu_read_lock(); +@@ -1163,7 +1163,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) + + /* Move network header to the right position for VLAN tagged packets */ + if (eth_type_vlan(skb->protocol) && +- __vlan_get_protocol(skb, skb->protocol, &depth) != 0) ++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + + rcu_read_lock(); +diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h +index 41a518336673b..4e7e72f3da5bd 100644 +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -626,6 +626,23 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) + return __vlan_get_protocol(skb, skb->protocol, NULL); + } + ++/* This version of __vlan_get_protocol() also pulls mac header in skb->head */ ++static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb, ++ __be16 type, int *depth) ++{ ++ int maclen; ++ ++ type = __vlan_get_protocol(skb, type, &maclen); ++ ++ if (type) { ++ if (!pskb_may_pull(skb, maclen)) ++ type = 0; ++ else if (depth) ++ *depth = maclen; ++ } ++ return type; ++} ++ + /* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c +index ec646656dbf14..3d69ad5463a9f 100644 +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -42,7 +42,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb + eth_type_vlan(skb->protocol)) { + int depth; + +- if (!__vlan_get_protocol(skb, skb->protocol, &depth)) ++ if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); +diff --git a/net/core/dev.c b/net/core/dev.c +index 7fc8ae7f3cd5b..30289cd1c29f4 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3314,7 +3314,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) + type = eth->h_proto; + } + +- return __vlan_get_protocol(skb, type, depth); ++ return vlan_get_protocol_and_depth(skb, type, depth); + } + + /** +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index f5d430bd372ce..ce484305be881 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1900,10 +1900,8 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) + /* Move network header to the right position for VLAN tagged packets */ + if (likely(skb->dev->type == ARPHRD_ETHER) && + eth_type_vlan(skb->protocol) && +- __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { +- if (pskb_may_pull(skb, depth)) +- skb_set_network_header(skb, depth); +- } ++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) ++ skb_set_network_header(skb, depth); + + skb_probe_transport_header(skb); + } +-- +2.39.2 + diff --git a/queue-5.15/net-annotate-sk-sk_err-write-from-do_recvmmsg.patch b/queue-5.15/net-annotate-sk-sk_err-write-from-do_recvmmsg.patch new file mode 100644 index 00000000000..e461a0e877f --- /dev/null +++ b/queue-5.15/net-annotate-sk-sk_err-write-from-do_recvmmsg.patch @@ -0,0 +1,40 @@ +From 4fc1806c90b5fdf6e209dc88fc2b6ad9d5a58061 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 16:35:53 +0000 +Subject: net: annotate sk->sk_err write from do_recvmmsg() + +From: Eric Dumazet + +[ Upstream commit e05a5f510f26607616fecdd4ac136310c8bea56b ] + +do_recvmmsg() can write to sk->sk_err from multiple threads. + +As said before, many other points reading or writing sk_err +need annotations. + +Fixes: 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/socket.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/socket.c b/net/socket.c +index 73666b878f2ce..5c49074ef7f2a 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -2820,7 +2820,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). + */ +- sock->sk->sk_err = -err; ++ WRITE_ONCE(sock->sk->sk_err, -err); + } + out_put: + fput_light(sock->file, fput_needed); +-- +2.39.2 + diff --git a/queue-5.15/net-datagram-fix-data-races-in-datagram_poll.patch b/queue-5.15/net-datagram-fix-data-races-in-datagram_poll.patch new file mode 100644 index 00000000000..e9e20da19e2 --- /dev/null +++ b/queue-5.15/net-datagram-fix-data-races-in-datagram_poll.patch @@ -0,0 +1,69 @@ +From 1a25f1d5e3e8d5900e3ebc75ed593cb17798d20f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 17:31:31 +0000 +Subject: net: datagram: fix data-races in datagram_poll() + +From: Eric Dumazet + +[ Upstream commit 5bca1d081f44c9443e61841842ce4e9179d327b6 ] + +datagram_poll() runs locklessly, we should add READ_ONCE() +annotations while reading sk->sk_err, sk->sk_shutdown and sk->sk_state. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230509173131.3263780-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/datagram.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/net/core/datagram.c b/net/core/datagram.c +index 28e5f921dcaf4..1ff8241217a9c 100644 +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -799,18 +799,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, + { + struct sock *sk = sock->sk; + __poll_t mask; ++ u8 shutdown; + + sock_poll_wait(file, sock, wait); + mask = 0; + + /* exceptional events? */ +- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) ++ if (READ_ONCE(sk->sk_err) || ++ !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +- if (sk->sk_shutdown & RCV_SHUTDOWN) ++ shutdown = READ_ONCE(sk->sk_shutdown); ++ if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; +- if (sk->sk_shutdown == SHUTDOWN_MASK) ++ if (shutdown == SHUTDOWN_MASK) + mask |= EPOLLHUP; + + /* readable? */ +@@ -819,10 +822,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, + + /* Connection-based need to check for termination and startup */ + if (connection_based(sk)) { +- if (sk->sk_state == TCP_CLOSE) ++ int state = READ_ONCE(sk->sk_state); ++ ++ if (state == TCP_CLOSE) + mask |= EPOLLHUP; + /* connection hasn't started yet? */ +- if (sk->sk_state == TCP_SYN_SENT) ++ if (state == TCP_SYN_SENT) + return mask; + } + +-- +2.39.2 + diff --git a/queue-5.15/net-deal-with-most-data-races-in-sk_wait_event.patch b/queue-5.15/net-deal-with-most-data-races-in-sk_wait_event.patch new file mode 100644 index 00000000000..8fca07ffe97 --- /dev/null +++ b/queue-5.15/net-deal-with-most-data-races-in-sk_wait_event.patch @@ -0,0 +1,224 @@ +From 4a80b2402ec6b4c23491d96e85bf4f2a9aafa659 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 18:29:48 +0000 +Subject: net: deal with most data-races in sk_wait_event() + +From: Eric Dumazet + +[ Upstream commit d0ac89f6f9879fae316c155de77b5173b3e2c9c9 ] + +__condition is evaluated twice in sk_wait_event() macro. + +First invocation is lockless, and reads can race with writes, +as spotted by syzbot. + +BUG: KCSAN: data-race in sk_stream_wait_connect / tcp_disconnect + +write to 0xffff88812d83d6a0 of 4 bytes by task 9065 on cpu 1: +tcp_disconnect+0x2cd/0xdb0 +inet_shutdown+0x19e/0x1f0 net/ipv4/af_inet.c:911 +__sys_shutdown_sock net/socket.c:2343 [inline] +__sys_shutdown net/socket.c:2355 [inline] +__do_sys_shutdown net/socket.c:2363 [inline] +__se_sys_shutdown+0xf8/0x140 net/socket.c:2361 +__x64_sys_shutdown+0x31/0x40 net/socket.c:2361 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff88812d83d6a0 of 4 bytes by task 9040 on cpu 0: +sk_stream_wait_connect+0x1de/0x3a0 net/core/stream.c:75 +tcp_sendmsg_locked+0x2e4/0x2120 net/ipv4/tcp.c:1266 +tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1484 +inet6_sendmsg+0x63/0x80 net/ipv6/af_inet6.c:651 +sock_sendmsg_nosec net/socket.c:724 [inline] +sock_sendmsg net/socket.c:747 [inline] +__sys_sendto+0x246/0x300 net/socket.c:2142 +__do_sys_sendto net/socket.c:2154 [inline] +__se_sys_sendto net/socket.c:2150 [inline] +__x64_sys_sendto+0x78/0x90 net/socket.c:2150 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x00000000 -> 0x00000068 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/stream.c | 12 ++++++------ + net/ipv4/tcp_bpf.c | 2 +- + net/llc/af_llc.c | 8 +++++--- + net/smc/smc_close.c | 4 ++-- + net/smc/smc_rx.c | 4 ++-- + net/smc/smc_tx.c | 4 ++-- + net/tipc/socket.c | 4 ++-- + net/tls/tls_main.c | 3 ++- + 8 files changed, 22 insertions(+), 19 deletions(-) + +diff --git a/net/core/stream.c b/net/core/stream.c +index cd60746877b1e..422ee97e4f2be 100644 +--- a/net/core/stream.c ++++ b/net/core/stream.c +@@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) + add_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending++; + done = sk_wait_event(sk, timeo_p, +- !sk->sk_err && +- !((1 << sk->sk_state) & ++ !READ_ONCE(sk->sk_err) && ++ !((1 << READ_ONCE(sk->sk_state)) & + ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait); + remove_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending--; +@@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect); + * sk_stream_closing - Return 1 if we still have things to send in our buffers. + * @sk: socket to verify + */ +-static inline int sk_stream_closing(struct sock *sk) ++static int sk_stream_closing(const struct sock *sk) + { +- return (1 << sk->sk_state) & ++ return (1 << READ_ONCE(sk->sk_state)) & + (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK); + } + +@@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) + + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk->sk_write_pending++; +- sk_wait_event(sk, ¤t_timeo, sk->sk_err || +- (sk->sk_shutdown & SEND_SHUTDOWN) || ++ sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || ++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && + !vm_wait), &wait); + sk->sk_write_pending--; +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 20ad554af3693..e3a9477293ce4 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -168,7 +168,7 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || +- !skb_queue_empty(&sk->sk_receive_queue), &wait); ++ !skb_queue_empty_lockless(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c +index 99305aadaa087..8b9a10d10036f 100644 +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -581,7 +581,8 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) + + add_wait_queue(sk_sleep(sk), &wait); + while (1) { +- if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait)) ++ if (sk_wait_event(sk, &timeout, ++ READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait)) + break; + rc = -ERESTARTSYS; + if (signal_pending(current)) +@@ -601,7 +602,8 @@ static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) + + add_wait_queue(sk_sleep(sk), &wait); + while (1) { +- if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait)) ++ if (sk_wait_event(sk, &timeout, ++ READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait)) + break; + if (signal_pending(current) || !timeout) + break; +@@ -620,7 +622,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) + while (1) { + rc = 0; + if (sk_wait_event(sk, &timeout, +- (sk->sk_shutdown & RCV_SHUTDOWN) || ++ (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) || + (!llc_data_accept_state(llc->state) && + !llc->remote_busy_flag && + !llc->p_flag), &wait)) +diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c +index 84102db5bb314..149a59ecd299f 100644 +--- a/net/smc/smc_close.c ++++ b/net/smc/smc_close.c +@@ -64,8 +64,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) + + rc = sk_wait_event(sk, &timeout, + !smc_tx_prepared_sends(&smc->conn) || +- sk->sk_err == ECONNABORTED || +- sk->sk_err == ECONNRESET || ++ READ_ONCE(sk->sk_err) == ECONNABORTED || ++ READ_ONCE(sk->sk_err) == ECONNRESET || + smc->conn.killed, + &wait); + if (rc) +diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c +index 45b0575520da4..5b63c250ba604 100644 +--- a/net/smc/smc_rx.c ++++ b/net/smc/smc_rx.c +@@ -204,9 +204,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo, + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + add_wait_queue(sk_sleep(sk), &wait); + rc = sk_wait_event(sk, timeo, +- sk->sk_err || ++ READ_ONCE(sk->sk_err) || + cflags->peer_conn_abort || +- sk->sk_shutdown & RCV_SHUTDOWN || ++ READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN || + conn->killed || + fcrit(conn), + &wait); +diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c +index 31ee76131a79e..a878ea084dbd6 100644 +--- a/net/smc/smc_tx.c ++++ b/net/smc/smc_tx.c +@@ -113,8 +113,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) + break; /* at least 1 byte of free & no urgent data */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk_wait_event(sk, &timeo, +- sk->sk_err || +- (sk->sk_shutdown & SEND_SHUTDOWN) || ++ READ_ONCE(sk->sk_err) || ++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + smc_cdc_rxed_any_close(conn) || + (atomic_read(&conn->sndbuf_space) && + !conn->urg_tx_pend), +diff --git a/net/tipc/socket.c b/net/tipc/socket.c +index f1c3b8eb4b3d3..b34857217fde4 100644 +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -313,9 +313,9 @@ static void tsk_rej_rx_queue(struct sock *sk, int error) + tipc_sk_respond(sk, skb, error); + } + +-static bool tipc_sk_connected(struct sock *sk) ++static bool tipc_sk_connected(const struct sock *sk) + { +- return sk->sk_state == TIPC_ESTABLISHED; ++ return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED; + } + + /* tipc_sk_type_connectionless - check if the socket is datagram socket +diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c +index abd0c4557cb93..20b8ba4d1dfc4 100644 +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -92,7 +92,8 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) + break; + } + +- if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait)) ++ if (sk_wait_event(sk, timeo, ++ !READ_ONCE(sk->sk_write_pending), &wait)) + break; + } + remove_wait_queue(sk_sleep(sk), &wait); +-- +2.39.2 + diff --git a/queue-5.15/net-fix-load-tearing-on-sk-sk_stamp-in-sock_recv_cms.patch b/queue-5.15/net-fix-load-tearing-on-sk-sk_stamp-in-sock_recv_cms.patch new file mode 100644 index 00000000000..5c8205d6072 --- /dev/null +++ b/queue-5.15/net-fix-load-tearing-on-sk-sk_stamp-in-sock_recv_cms.patch @@ -0,0 +1,82 @@ +From 7e471b8f88c98ea86909997ed4652099a9174ca9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 10:55:43 -0700 +Subject: net: Fix load-tearing on sk->sk_stamp in sock_recv_cmsgs(). + +From: Kuniyuki Iwashima + +[ Upstream commit dfd9248c071a3710c24365897459538551cb7167 ] + +KCSAN found a data race in sock_recv_cmsgs() where the read access +to sk->sk_stamp needs READ_ONCE(). + +BUG: KCSAN: data-race in packet_recvmsg / packet_recvmsg + +write (marked) to 0xffff88803c81f258 of 8 bytes by task 19171 on cpu 0: + sock_write_timestamp include/net/sock.h:2670 [inline] + sock_recv_cmsgs include/net/sock.h:2722 [inline] + packet_recvmsg+0xb97/0xd00 net/packet/af_packet.c:3489 + sock_recvmsg_nosec net/socket.c:1019 [inline] + sock_recvmsg+0x11a/0x130 net/socket.c:1040 + sock_read_iter+0x176/0x220 net/socket.c:1118 + call_read_iter include/linux/fs.h:1845 [inline] + new_sync_read fs/read_write.c:389 [inline] + vfs_read+0x5e0/0x630 fs/read_write.c:470 + ksys_read+0x163/0x1a0 fs/read_write.c:613 + __do_sys_read fs/read_write.c:623 [inline] + __se_sys_read fs/read_write.c:621 [inline] + __x64_sys_read+0x41/0x50 fs/read_write.c:621 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +read to 0xffff88803c81f258 of 8 bytes by task 19183 on cpu 1: + sock_recv_cmsgs include/net/sock.h:2721 [inline] + packet_recvmsg+0xb64/0xd00 net/packet/af_packet.c:3489 + sock_recvmsg_nosec net/socket.c:1019 [inline] + sock_recvmsg+0x11a/0x130 net/socket.c:1040 + sock_read_iter+0x176/0x220 net/socket.c:1118 + call_read_iter include/linux/fs.h:1845 [inline] + new_sync_read fs/read_write.c:389 [inline] + vfs_read+0x5e0/0x630 fs/read_write.c:470 + ksys_read+0x163/0x1a0 fs/read_write.c:613 + __do_sys_read fs/read_write.c:623 [inline] + __se_sys_read fs/read_write.c:621 [inline] + __x64_sys_read+0x41/0x50 fs/read_write.c:621 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +value changed: 0xffffffffc4653600 -> 0x0000000000000000 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 19183 Comm: syz-executor.5 Not tainted 6.3.0-rc7-02330-gca6270c12e20 #2 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 + +Fixes: 6c7c98bad488 ("sock: avoid dirtying sk_stamp, if possible") +Reported-by: syzbot +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230508175543.55756-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index 3a4e81399edc6..0309d2311487d 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2623,7 +2623,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, + __sock_recv_ts_and_drops(msg, sk, skb); + else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) + sock_write_timestamp(sk, skb->tstamp); +- else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) ++ else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP)) + sock_write_timestamp(sk, 0); + } + +-- +2.39.2 + diff --git a/queue-5.15/net-mdio-mvusb-fix-an-error-handling-path-in-mvusb_m.patch b/queue-5.15/net-mdio-mvusb-fix-an-error-handling-path-in-mvusb_m.patch new file mode 100644 index 00000000000..42f1630656f --- /dev/null +++ b/queue-5.15/net-mdio-mvusb-fix-an-error-handling-path-in-mvusb_m.patch @@ -0,0 +1,54 @@ +From 1a90695797d9c62f46752ce2188f515426525f9a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 May 2023 20:39:33 +0200 +Subject: net: mdio: mvusb: Fix an error handling path in mvusb_mdio_probe() + +From: Christophe JAILLET + +[ Upstream commit 27c1eaa07283b0c94becf8241f95368267cf558b ] + +Should of_mdiobus_register() fail, a previous usb_get_dev() call should be +undone as in the .disconnect function. + +Fixes: 04e37d92fbed ("net: phy: add marvell usb to mdio controller") +Signed-off-by: Christophe JAILLET +Reviewed-by: Simon Horman +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/mdio/mdio-mvusb.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/mdio/mdio-mvusb.c b/drivers/net/mdio/mdio-mvusb.c +index d5eabddfdf51b..11e048136ac23 100644 +--- a/drivers/net/mdio/mdio-mvusb.c ++++ b/drivers/net/mdio/mdio-mvusb.c +@@ -73,6 +73,7 @@ static int mvusb_mdio_probe(struct usb_interface *interface, + struct device *dev = &interface->dev; + struct mvusb_mdio *mvusb; + struct mii_bus *mdio; ++ int ret; + + mdio = devm_mdiobus_alloc_size(dev, sizeof(*mvusb)); + if (!mdio) +@@ -93,7 +94,15 @@ static int mvusb_mdio_probe(struct usb_interface *interface, + mdio->write = mvusb_mdio_write; + + usb_set_intfdata(interface, mvusb); +- return of_mdiobus_register(mdio, dev->of_node); ++ ret = of_mdiobus_register(mdio, dev->of_node); ++ if (ret) ++ goto put_dev; ++ ++ return 0; ++ ++put_dev: ++ usb_put_dev(mvusb->udev); ++ return ret; + } + + static void mvusb_mdio_disconnect(struct usb_interface *interface) +-- +2.39.2 + diff --git a/queue-5.15/net-stmmac-initialize-mac_oneus_tic_counter-register.patch b/queue-5.15/net-stmmac-initialize-mac_oneus_tic_counter-register.patch new file mode 100644 index 00000000000..d9244dd084c --- /dev/null +++ b/queue-5.15/net-stmmac-initialize-mac_oneus_tic_counter-register.patch @@ -0,0 +1,96 @@ +From 9943d176649ba74f9740dd58f07f4d75548d1775 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 May 2023 01:58:45 +0200 +Subject: net: stmmac: Initialize MAC_ONEUS_TIC_COUNTER register + +From: Marek Vasut + +[ Upstream commit 8efbdbfa99381a017dd2c0f6375a7d80a8118b74 ] + +Initialize MAC_ONEUS_TIC_COUNTER register with correct value derived +from CSR clock, otherwise EEE is unstable on at least NXP i.MX8M Plus +and Micrel KSZ9131RNX PHY, to the point where not even ARP request can +be sent out. + +i.MX 8M Plus Applications Processor Reference Manual, Rev. 1, 06/2021 +11.7.6.1.34 One-microsecond Reference Timer (MAC_ONEUS_TIC_COUNTER) +defines this register as: +" +This register controls the generation of the Reference time (1 microsecond +tic) for all the LPI timers. This timer has to be programmed by the software +initially. +... +The application must program this counter so that the number of clock cycles +of CSR clock is 1us. (Subtract 1 from the value before programming). +For example if the CSR clock is 100MHz then this field needs to be programmed +to value 100 - 1 = 99 (which is 0x63). +This is required to generate the 1US events that are used to update some of +the EEE related counters. +" + +The reset value is 0x63 on i.MX8M Plus, which means expected CSR clock are +100 MHz. However, the i.MX8M Plus "enet_qos_root_clk" are 266 MHz instead, +which means the LPI timers reach their count much sooner on this platform. + +This is visible using a scope by monitoring e.g. exit from LPI mode on TX_CTL +line from MAC to PHY. This should take 30us per STMMAC_DEFAULT_TWT_LS setting, +during which the TX_CTL line transitions from tristate to low, and 30 us later +from low to high. On i.MX8M Plus, this transition takes 11 us, which matches +the 30us * 100/266 formula for misconfigured MAC_ONEUS_TIC_COUNTER register. + +Configure MAC_ONEUS_TIC_COUNTER based on CSR clock, so that the LPI timers +have correct 1us reference. This then fixes EEE on i.MX8M Plus with Micrel +KSZ9131RNX PHY. + +Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") +Signed-off-by: Marek Vasut +Tested-by: Harald Seiler +Reviewed-by: Francesco Dolcini +Tested-by: Francesco Dolcini # Toradex Verdin iMX8MP +Reviewed-by: Jesse Brandeburg +Link: https://lore.kernel.org/r/20230506235845.246105-1-marex@denx.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 1 + + drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 5 +++++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +index 71dad409f78b0..12c0e60809f47 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +@@ -181,6 +181,7 @@ enum power_event { + #define GMAC4_LPI_CTRL_STATUS 0xd0 + #define GMAC4_LPI_TIMER_CTRL 0xd4 + #define GMAC4_LPI_ENTRY_TIMER 0xd8 ++#define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc + + /* LPI control and status defines */ + #define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */ +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index 0cb4d2d35786a..29480314a4867 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -26,6 +26,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, + struct stmmac_priv *priv = netdev_priv(dev); + void __iomem *ioaddr = hw->pcsr; + u32 value = readl(ioaddr + GMAC_CONFIG); ++ u32 clk_rate; + + value |= GMAC_CORE_INIT; + +@@ -48,6 +49,10 @@ static void dwmac4_core_init(struct mac_device_info *hw, + + writel(value, ioaddr + GMAC_CONFIG); + ++ /* Configure LPI 1us counter to number of CSR clock ticks in 1us - 1 */ ++ clk_rate = clk_get_rate(priv->plat->stmmac_clk); ++ writel((clk_rate / 1000000) - 1, ioaddr + GMAC4_MAC_ONEUS_TIC_COUNTER); ++ + /* Enable GMAC interrupts */ + value = GMAC_INT_DEFAULT_ENABLE; + +-- +2.39.2 + diff --git a/queue-5.15/net-stmmac-switch-to-use-interrupt-for-hw-crosstimes.patch b/queue-5.15/net-stmmac-switch-to-use-interrupt-for-hw-crosstimes.patch new file mode 100644 index 00000000000..87cd173c324 --- /dev/null +++ b/queue-5.15/net-stmmac-switch-to-use-interrupt-for-hw-crosstimes.patch @@ -0,0 +1,224 @@ +From 1ae9114699d7206b6c2a1719d6c0b0b33adb01d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Jul 2022 15:54:27 +0800 +Subject: net: stmmac: switch to use interrupt for hw crosstimestamping + +From: Wong Vee Khee + +[ Upstream commit 76c16d3e19446deea98b7883f261758b96b8781a ] + +Using current implementation of polling mode, there is high chances we +will hit into timeout error when running phc2sys. Hence, update the +implementation of hardware crosstimestamping to use the MAC interrupt +service routine instead of polling for TSIS bit in the MAC Timestamp +Interrupt Status register to be set. + +Cc: Richard Cochran +Signed-off-by: Wong Vee Khee +Signed-off-by: David S. Miller +Stable-dep-of: 8efbdbfa9938 ("net: stmmac: Initialize MAC_ONEUS_TIC_COUNTER register") +Signed-off-by: Sasha Levin +--- + .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 25 ++++++++++++------- + drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 3 ++- + .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 4 +++ + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + + .../ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 5 ++++ + .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 12 +-------- + include/linux/stmmac.h | 1 + + 7 files changed, 30 insertions(+), 21 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +index fb9ff4ce94530..c9e88df9e8665 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr, + *art_time = ns; + } + ++static int stmmac_cross_ts_isr(struct stmmac_priv *priv) ++{ ++ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE); ++} ++ + static int intel_crosststamp(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device, + u32 num_snapshot; + u32 gpio_value; + u32 acr_value; +- int ret; +- u32 v; + int i; + + if (!boot_cpu_has(X86_FEATURE_ART)) +@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device, + if (priv->plat->ext_snapshot_en) + return -EBUSY; + ++ priv->plat->int_snapshot_en = 1; ++ + mutex_lock(&priv->aux_ts_lock); + /* Enable Internal snapshot trigger */ + acr_value = readl(ptpaddr + PTP_ACR); +@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device, + break; + default: + mutex_unlock(&priv->aux_ts_lock); ++ priv->plat->int_snapshot_en = 0; + return -EINVAL; + } + writel(acr_value, ptpaddr + PTP_ACR); +@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device, + gpio_value |= GMAC_GPO1; + writel(gpio_value, ioaddr + GMAC_GPIO_STATUS); + +- /* Poll for time sync operation done */ +- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v, +- (v & GMAC_INT_TSIE), 100, 10000); +- +- if (ret == -ETIMEDOUT) { +- pr_err("%s: Wait for time sync operation timeout\n", __func__); +- return ret; ++ /* Time sync done Indication - Interrupt method */ ++ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait, ++ stmmac_cross_ts_isr(priv), ++ HZ / 100)) { ++ priv->plat->int_snapshot_en = 0; ++ return -ETIMEDOUT; + } + + num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) & +@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device, + } + + system->cycles *= intel_priv->crossts_adj; ++ priv->plat->int_snapshot_en = 0; + + return 0; + } +@@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, + + plat->has_crossts = true; + plat->crosststamp = intel_crosststamp; ++ plat->int_snapshot_en = 0; + + /* Setup MSI vector offset specific to Intel mGbE controller */ + plat->msi_mac_vec = 29; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +index 462ca7ed095a2..71dad409f78b0 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +@@ -150,7 +150,8 @@ + #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ + GMAC_INT_PCS_ANE) + +-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN) ++#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \ ++ GMAC_INT_TSIE) + + enum dwmac4_irq_status { + time_stamp_irq = 0x00001000, +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index cd85a2d076c99..0cb4d2d35786a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -23,6 +23,7 @@ + static void dwmac4_core_init(struct mac_device_info *hw, + struct net_device *dev) + { ++ struct stmmac_priv *priv = netdev_priv(dev); + void __iomem *ioaddr = hw->pcsr; + u32 value = readl(ioaddr + GMAC_CONFIG); + +@@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw, + value |= GMAC_INT_FPE_EN; + + writel(value, ioaddr + GMAC_INT_EN); ++ ++ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE) ++ init_waitqueue_head(&priv->tstamp_busy_wait); + } + + static void dwmac4_rx_queue_enable(struct mac_device_info *hw, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h +index 05b5371ca036b..f03779205ade4 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h +@@ -265,6 +265,7 @@ struct stmmac_priv { + spinlock_t ptp_lock; + /* Protects auxiliary snapshot registers from concurrent access. */ + struct mutex aux_ts_lock; ++ wait_queue_head_t tstamp_busy_wait; + + void __iomem *mmcaddr; + void __iomem *ptpaddr; +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +index 4538e4fd81898..2c6245b2281ca 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +@@ -180,6 +180,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv) + u64 ptp_time; + int i; + ++ if (priv->plat->int_snapshot_en) { ++ wake_up(&priv->tstamp_busy_wait); ++ return; ++ } ++ + tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE; + + if (!tsync_int) +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +index 487418ef9b4f8..e6221c33572d4 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + void __iomem *ptpaddr = priv->ptpaddr; +- void __iomem *ioaddr = priv->hw->pcsr; + struct stmmac_pps_cfg *cfg; +- u32 intr_value, acr_value; + int ret = -EOPNOTSUPP; + unsigned long flags; ++ u32 acr_value; + + switch (rq->type) { + case PTP_CLK_REQ_PEROUT: +@@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, + netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); +- /* Enable Timestamp Interrupt */ +- intr_value = readl(ioaddr + GMAC_INT_EN); +- intr_value |= GMAC_INT_TSIE; +- writel(intr_value, ioaddr + GMAC_INT_EN); +- + } else { + netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); +- /* Disable Timestamp Interrupt */ +- intr_value = readl(ioaddr + GMAC_INT_EN); +- intr_value &= ~GMAC_INT_TSIE; +- writel(intr_value, ioaddr + GMAC_INT_EN); + } + writel(acr_value, ptpaddr + PTP_ACR); + mutex_unlock(&priv->aux_ts_lock); +diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h +index cc338c6c74954..24bc3f7967c3b 100644 +--- a/include/linux/stmmac.h ++++ b/include/linux/stmmac.h +@@ -260,6 +260,7 @@ struct plat_stmmacenet_data { + bool has_crossts; + int int_snapshot_num; + int ext_snapshot_num; ++ bool int_snapshot_en; + bool ext_snapshot_en; + bool multi_msi_en; + int msi_mac_vec; +-- +2.39.2 + diff --git a/queue-5.15/netfilter-conntrack-fix-possible-bug_on-with-enable_.patch b/queue-5.15/netfilter-conntrack-fix-possible-bug_on-with-enable_.patch new file mode 100644 index 00000000000..483b581f7fd --- /dev/null +++ b/queue-5.15/netfilter-conntrack-fix-possible-bug_on-with-enable_.patch @@ -0,0 +1,77 @@ +From aec7b44bcf21098a008292e286ca67dd529668d7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 May 2023 14:55:02 +0200 +Subject: netfilter: conntrack: fix possible bug_on with enable_hooks=1 + +From: Florian Westphal + +[ Upstream commit e72eeab542dbf4f544e389e64fa13b82a1b6d003 ] + +I received a bug report (no reproducer so far) where we trip over + +712 rcu_read_lock(); +713 ct_hook = rcu_dereference(nf_ct_hook); +714 BUG_ON(ct_hook == NULL); // here + +In nf_conntrack_destroy(). + +First turn this BUG_ON into a WARN. I think it was triggered +via enable_hooks=1 flag. + +When this flag is turned on, the conntrack hooks are registered +before nf_ct_hook pointer gets assigned. +This opens a short window where packets enter the conntrack machinery, +can have skb->_nfct set up and a subsequent kfree_skb might occur +before nf_ct_hook is set. + +Call nf_conntrack_init_end() to set nf_ct_hook before we register the +pernet ops. + +Fixes: ba3fbe663635 ("netfilter: nf_conntrack: provide modparam to always register conntrack hooks") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/core.c | 6 ++++-- + net/netfilter/nf_conntrack_standalone.c | 3 ++- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/netfilter/core.c b/net/netfilter/core.c +index cca0762a90102..8eed6536e7e7b 100644 +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -675,9 +675,11 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); +- BUG_ON(ct_hook == NULL); +- ct_hook->destroy(nfct); ++ if (ct_hook) ++ ct_hook->destroy(nfct); + rcu_read_unlock(); ++ ++ WARN_ON(!ct_hook); + } + EXPORT_SYMBOL(nf_conntrack_destroy); + +diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c +index 338f02a12076b..7515705583bcf 100644 +--- a/net/netfilter/nf_conntrack_standalone.c ++++ b/net/netfilter/nf_conntrack_standalone.c +@@ -1229,11 +1229,12 @@ static int __init nf_conntrack_standalone_init(void) + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + #endif + ++ nf_conntrack_init_end(); ++ + ret = register_pernet_subsys(&nf_conntrack_net_ops); + if (ret < 0) + goto out_pernet; + +- nf_conntrack_init_end(); + return 0; + + out_pernet: +-- +2.39.2 + diff --git a/queue-5.15/netfilter-nf_tables-always-release-netdev-hooks-from.patch b/queue-5.15/netfilter-nf_tables-always-release-netdev-hooks-from.patch new file mode 100644 index 00000000000..fe264f47049 --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-always-release-netdev-hooks-from.patch @@ -0,0 +1,76 @@ +From 356b8a45ef4b28367c76c3fb44c53d953ae13622 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 May 2023 14:20:21 +0200 +Subject: netfilter: nf_tables: always release netdev hooks from notifier + +From: Florian Westphal + +[ Upstream commit dc1c9fd4a8bbe1e06add9053010b652449bfe411 ] + +This reverts "netfilter: nf_tables: skip netdev events generated on netns removal". + +The problem is that when a veth device is released, the veth release +callback will also queue the peer netns device for removal. + +Its possible that the peer netns is also slated for removal. In this +case, the device memory is already released before the pre_exit hook of +the peer netns runs: + +BUG: KASAN: slab-use-after-free in nf_hook_entry_head+0x1b8/0x1d0 +Read of size 8 at addr ffff88812c0124f0 by task kworker/u8:1/45 +Workqueue: netns cleanup_net +Call Trace: + nf_hook_entry_head+0x1b8/0x1d0 + __nf_unregister_net_hook+0x76/0x510 + nft_netdev_unregister_hooks+0xa0/0x220 + __nft_release_hook+0x184/0x490 + nf_tables_pre_exit_net+0x12f/0x1b0 + .. + +Order is: +1. First netns is released, veth_dellink() queues peer netns device + for removal +2. peer netns is queued for removal +3. peer netns device is released, unreg event is triggered +4. unreg event is ignored because netns is going down +5. pre_exit hook calls nft_netdev_unregister_hooks but device memory + might be free'd already. + +Fixes: 68a3765c659f ("netfilter: nf_tables: skip netdev events generated on netns removal") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_chain_filter.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c +index 3ced0eb6b7c3b..5b02408a920bf 100644 +--- a/net/netfilter/nft_chain_filter.c ++++ b/net/netfilter/nft_chain_filter.c +@@ -342,6 +342,12 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, + return; + } + ++ /* UNREGISTER events are also happening on netns exit. ++ * ++ * Although nf_tables core releases all tables/chains, only this event ++ * handler provides guarantee that hook->ops.dev is still accessible, ++ * so we cannot skip exiting net namespaces. ++ */ + __nft_release_basechain(ctx); + } + +@@ -360,9 +366,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, + event != NETDEV_CHANGENAME) + return NOTIFY_DONE; + +- if (!check_net(ctx.net)) +- return NOTIFY_DONE; +- + nft_net = nft_pernet(ctx.net); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { +-- +2.39.2 + diff --git a/queue-5.15/netlink-annotate-accesses-to-nlk-cb_running.patch b/queue-5.15/netlink-annotate-accesses-to-nlk-cb_running.patch new file mode 100644 index 00000000000..cf889094f50 --- /dev/null +++ b/queue-5.15/netlink-annotate-accesses-to-nlk-cb_running.patch @@ -0,0 +1,109 @@ +From dc454f777e4cf009bc6c4cdc56bbbd77f4e0917b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 16:56:34 +0000 +Subject: netlink: annotate accesses to nlk->cb_running + +From: Eric Dumazet + +[ Upstream commit a939d14919b799e6fff8a9c80296ca229ba2f8a4 ] + +Both netlink_recvmsg() and netlink_native_seq_show() read +nlk->cb_running locklessly. Use READ_ONCE() there. + +Add corresponding WRITE_ONCE() to netlink_dump() and +__netlink_dump_start() + +syzbot reported: +BUG: KCSAN: data-race in __netlink_dump_start / netlink_recvmsg + +write to 0xffff88813ea4db59 of 1 bytes by task 28219 on cpu 0: +__netlink_dump_start+0x3af/0x4d0 net/netlink/af_netlink.c:2399 +netlink_dump_start include/linux/netlink.h:308 [inline] +rtnetlink_rcv_msg+0x70f/0x8c0 net/core/rtnetlink.c:6130 +netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2577 +rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6192 +netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] +netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365 +netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1942 +sock_sendmsg_nosec net/socket.c:724 [inline] +sock_sendmsg net/socket.c:747 [inline] +sock_write_iter+0x1aa/0x230 net/socket.c:1138 +call_write_iter include/linux/fs.h:1851 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x463/0x760 fs/read_write.c:584 +ksys_write+0xeb/0x1a0 fs/read_write.c:637 +__do_sys_write fs/read_write.c:649 [inline] +__se_sys_write fs/read_write.c:646 [inline] +__x64_sys_write+0x42/0x50 fs/read_write.c:646 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff88813ea4db59 of 1 bytes by task 28222 on cpu 1: +netlink_recvmsg+0x3b4/0x730 net/netlink/af_netlink.c:2022 +sock_recvmsg_nosec+0x4c/0x80 net/socket.c:1017 +____sys_recvmsg+0x2db/0x310 net/socket.c:2718 +___sys_recvmsg net/socket.c:2762 [inline] +do_recvmmsg+0x2e5/0x710 net/socket.c:2856 +__sys_recvmmsg net/socket.c:2935 [inline] +__do_sys_recvmmsg net/socket.c:2958 [inline] +__se_sys_recvmmsg net/socket.c:2951 [inline] +__x64_sys_recvmmsg+0xe2/0x160 net/socket.c:2951 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x00 -> 0x01 + +Fixes: 16b304f3404f ("netlink: Eliminate kmalloc in netlink dump operation.") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/netlink/af_netlink.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 84a7a29be49d8..998c736d3ae8b 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2000,7 +2000,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + + skb_free_datagram(sk, skb); + +- if (nlk->cb_running && ++ if (READ_ONCE(nlk->cb_running) && + atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { + ret = netlink_dump(sk); + if (ret) { +@@ -2312,7 +2312,7 @@ static int netlink_dump(struct sock *sk) + if (cb->done) + cb->done(cb); + +- nlk->cb_running = false; ++ WRITE_ONCE(nlk->cb_running, false); + module = cb->module; + skb = cb->skb; + mutex_unlock(nlk->cb_mutex); +@@ -2375,7 +2375,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + goto error_put; + } + +- nlk->cb_running = true; ++ WRITE_ONCE(nlk->cb_running, true); + nlk->dump_done_errno = INT_MAX; + + mutex_unlock(nlk->cb_mutex); +@@ -2661,7 +2661,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v) + nlk->groups ? (u32)nlk->groups[0] : 0, + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), +- nlk->cb_running, ++ READ_ONCE(nlk->cb_running), + refcount_read(&s->sk_refcnt), + atomic_read(&s->sk_drops), + sock_i_ino(s) +-- +2.39.2 + diff --git a/queue-5.15/scsi-ufs-core-fix-i-o-hang-that-occurs-when-bkops-fa.patch b/queue-5.15/scsi-ufs-core-fix-i-o-hang-that-occurs-when-bkops-fa.patch new file mode 100644 index 00000000000..1080252344d --- /dev/null +++ b/queue-5.15/scsi-ufs-core-fix-i-o-hang-that-occurs-when-bkops-fa.patch @@ -0,0 +1,50 @@ +From b628f6e34b9e4279e41d399974779e70275411cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Apr 2023 12:17:21 +0900 +Subject: scsi: ufs: core: Fix I/O hang that occurs when BKOPS fails in W-LUN + suspend + +From: Keoseong Park + +[ Upstream commit 1a7edd041f2d252f251523ba3f2eaead076a8f8d ] + +Even when urgent BKOPS fails, the consumer will get stuck in runtime +suspend status. Like commit 1a5665fc8d7a ("scsi: ufs: core: WLUN suspend +SSU/enter hibern8 fail recovery"), trigger the error handler and return +-EBUSY to break the suspend. + +Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun") +Signed-off-by: Keoseong Park +Link: https://lore.kernel.org/r/20230425031721epcms2p5d4de65616478c967d466626e20c42a3a@epcms2p5 +Reviewed-by: Avri Altman +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/ufs/ufshcd.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c +index bc2b16701a116..d00d263705e15 100644 +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -8756,8 +8756,16 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) + * that performance might be impacted. + */ + ret = ufshcd_urgent_bkops(hba); +- if (ret) ++ if (ret) { ++ /* ++ * If return err in suspend flow, IO will hang. ++ * Trigger error handler and break suspend for ++ * error recovery. ++ */ ++ ufshcd_force_error_recovery(hba); ++ ret = -EBUSY; + goto enable_scaling; ++ } + } else { + /* make sure that auto bkops is disabled */ + ufshcd_disable_auto_bkops(hba); +-- +2.39.2 + diff --git a/queue-5.15/series b/queue-5.15/series index e69de29bb2d..8221b43135f 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -0,0 +1,29 @@ +drm-mipi-dsi-set-the-fwnode-for-mipi_dsi_device.patch +arm-9296-1-hp-jornada-7xx-fix-kernel-doc-warnings.patch +net-mdio-mvusb-fix-an-error-handling-path-in-mvusb_m.patch +scsi-ufs-core-fix-i-o-hang-that-occurs-when-bkops-fa.patch +tick-broadcast-make-broadcast-device-replacement-wor.patch +linux-dim-do-nothing-if-no-time-delta-between-sample.patch +net-stmmac-switch-to-use-interrupt-for-hw-crosstimes.patch +net-stmmac-initialize-mac_oneus_tic_counter-register.patch +net-fix-load-tearing-on-sk-sk_stamp-in-sock_recv_cms.patch +netfilter-nf_tables-always-release-netdev-hooks-from.patch +netfilter-conntrack-fix-possible-bug_on-with-enable_.patch +netlink-annotate-accesses-to-nlk-cb_running.patch +net-annotate-sk-sk_err-write-from-do_recvmmsg.patch +net-deal-with-most-data-races-in-sk_wait_event.patch +net-add-vlan_get_protocol_and_depth-helper.patch +tcp-add-annotations-around-sk-sk_shutdown-accesses.patch +gve-remove-the-code-of-clearing-pba-bit.patch +ipvlan-fix-out-of-bounds-caused-by-unclear-skb-cb.patch +net-datagram-fix-data-races-in-datagram_poll.patch +af_unix-fix-a-data-race-of-sk-sk_receive_queue-qlen.patch +af_unix-fix-data-races-around-sk-sk_shutdown.patch +drm-i915-dp-prevent-potential-div-by-zero.patch +fbdev-arcfb-fix-error-handling-in-arcfb_probe.patch +ext4-remove-an-unused-variable-warning-with-config_q.patch +ext4-reflect-error-codes-from-ext4_multi_mount_prote.patch +ext4-don-t-clear-sb_rdonly-when-remounting-r-w-until.patch +ext4-fix-lockdep-warning-when-enabling-mmp.patch +ext4-allow-to-find-by-goal-if-ext4_mb_hint_goal_only.patch +ext4-allow-ext4_get_group_info-to-fail.patch diff --git a/queue-5.15/tcp-add-annotations-around-sk-sk_shutdown-accesses.patch b/queue-5.15/tcp-add-annotations-around-sk-sk_shutdown-accesses.patch new file mode 100644 index 00000000000..6db0089b8aa --- /dev/null +++ b/queue-5.15/tcp-add-annotations-around-sk-sk_shutdown-accesses.patch @@ -0,0 +1,158 @@ +From 082c61925863eb354b1a8a8506ab333a723cafc1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 20:36:56 +0000 +Subject: tcp: add annotations around sk->sk_shutdown accesses + +From: Eric Dumazet + +[ Upstream commit e14cadfd80d76f01bfaa1a8d745b1db19b57d6be ] + +Now sk->sk_shutdown is no longer a bitfield, we can add +standard READ_ONCE()/WRITE_ONCE() annotations to silence +KCSAN reports like the following: + +BUG: KCSAN: data-race in tcp_disconnect / tcp_poll + +write to 0xffff88814588582c of 1 bytes by task 3404 on cpu 1: +tcp_disconnect+0x4d6/0xdb0 net/ipv4/tcp.c:3121 +__inet_stream_connect+0x5dd/0x6e0 net/ipv4/af_inet.c:715 +inet_stream_connect+0x48/0x70 net/ipv4/af_inet.c:727 +__sys_connect_file net/socket.c:2001 [inline] +__sys_connect+0x19b/0x1b0 net/socket.c:2018 +__do_sys_connect net/socket.c:2028 [inline] +__se_sys_connect net/socket.c:2025 [inline] +__x64_sys_connect+0x41/0x50 net/socket.c:2025 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff88814588582c of 1 bytes by task 3374 on cpu 0: +tcp_poll+0x2e6/0x7d0 net/ipv4/tcp.c:562 +sock_poll+0x253/0x270 net/socket.c:1383 +vfs_poll include/linux/poll.h:88 [inline] +io_poll_check_events io_uring/poll.c:281 [inline] +io_poll_task_func+0x15a/0x820 io_uring/poll.c:333 +handle_tw_list io_uring/io_uring.c:1184 [inline] +tctx_task_work+0x1fe/0x4d0 io_uring/io_uring.c:1246 +task_work_run+0x123/0x160 kernel/task_work.c:179 +get_signal+0xe64/0xff0 kernel/signal.c:2635 +arch_do_signal_or_restart+0x89/0x2a0 arch/x86/kernel/signal.c:306 +exit_to_user_mode_loop+0x6f/0xe0 kernel/entry/common.c:168 +exit_to_user_mode_prepare+0x6c/0xb0 kernel/entry/common.c:204 +__syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] +syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:297 +do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x03 -> 0x00 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/af_inet.c | 2 +- + net/ipv4/tcp.c | 14 ++++++++------ + net/ipv4/tcp_input.c | 4 ++-- + 3 files changed, 11 insertions(+), 9 deletions(-) + +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 91710e5eedff0..7fa1b0a45176f 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -890,7 +890,7 @@ int inet_shutdown(struct socket *sock, int how) + EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ + fallthrough; + default: +- sk->sk_shutdown |= how; ++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); + if (sk->sk_prot->shutdown) + sk->sk_prot->shutdown(sk, how); + break; +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 51f34560a9d63..b91ddd2a2f96d 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -502,6 +502,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) + __poll_t mask; + struct sock *sk = sock->sk; + const struct tcp_sock *tp = tcp_sk(sk); ++ u8 shutdown; + int state; + + sock_poll_wait(file, sock, wait); +@@ -544,9 +545,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) + * NOTE. Check for TCP_CLOSE is added. The goal is to prevent + * blocking on fresh not-connected or disconnected socket. --ANK + */ +- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) ++ shutdown = READ_ONCE(sk->sk_shutdown); ++ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + mask |= EPOLLHUP; +- if (sk->sk_shutdown & RCV_SHUTDOWN) ++ if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + + /* Connected or passive Fast Open socket? */ +@@ -562,7 +564,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) + if (tcp_stream_is_readable(sk, target)) + mask |= EPOLLIN | EPOLLRDNORM; + +- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { ++ if (!(shutdown & SEND_SHUTDOWN)) { + if (__sk_stream_is_writeable(sk, 1)) { + mask |= EPOLLOUT | EPOLLWRNORM; + } else { /* send SIGIO later */ +@@ -2740,7 +2742,7 @@ void __tcp_close(struct sock *sk, long timeout) + int data_was_unread = 0; + int state; + +- sk->sk_shutdown = SHUTDOWN_MASK; ++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); + + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); +@@ -3006,7 +3008,7 @@ int tcp_disconnect(struct sock *sk, int flags) + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); + +- sk->sk_shutdown = 0; ++ WRITE_ONCE(sk->sk_shutdown, 0); + sock_reset_flag(sk, SOCK_DONE); + tp->srtt_us = 0; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); +@@ -4453,7 +4455,7 @@ void tcp_done(struct sock *sk) + if (req) + reqsk_fastopen_remove(sk, req, false); + +- sk->sk_shutdown = SHUTDOWN_MASK; ++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 102a0436eb291..65f5d07cd83d9 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4353,7 +4353,7 @@ void tcp_fin(struct sock *sk) + + inet_csk_schedule_ack(sk); + +- sk->sk_shutdown |= RCV_SHUTDOWN; ++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); + sock_set_flag(sk, SOCK_DONE); + + switch (sk->sk_state) { +@@ -6531,7 +6531,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) + break; + + tcp_set_state(sk, TCP_FIN_WAIT2); +- sk->sk_shutdown |= SEND_SHUTDOWN; ++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN); + + sk_dst_confirm(sk); + +-- +2.39.2 + diff --git a/queue-5.15/tick-broadcast-make-broadcast-device-replacement-wor.patch b/queue-5.15/tick-broadcast-make-broadcast-device-replacement-wor.patch new file mode 100644 index 00000000000..ec4237fac33 --- /dev/null +++ b/queue-5.15/tick-broadcast-make-broadcast-device-replacement-wor.patch @@ -0,0 +1,274 @@ +From 254cee1742df6e1d3bdf43aff584f36dbd9603c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 May 2023 18:40:57 +0200 +Subject: tick/broadcast: Make broadcast device replacement work correctly + +From: Thomas Gleixner + +[ Upstream commit f9d36cf445ffff0b913ba187a3eff78028f9b1fb ] + +When a tick broadcast clockevent device is initialized for one shot mode +then tick_broadcast_setup_oneshot() OR's the periodic broadcast mode +cpumask into the oneshot broadcast cpumask. + +This is required when switching from periodic broadcast mode to oneshot +broadcast mode to ensure that CPUs which are waiting for periodic +broadcast are woken up on the next tick. + +But it is subtly broken, when an active broadcast device is replaced and +the system is already in oneshot (NOHZ/HIGHRES) mode. Victor observed +this and debugged the issue. + +Then the OR of the periodic broadcast CPU mask is wrong as the periodic +cpumask bits are sticky after tick_broadcast_enable() set it for a CPU +unless explicitly cleared via tick_broadcast_disable(). + +That means that this sets all other CPUs which have tick broadcasting +enabled at that point unconditionally in the oneshot broadcast mask. + +If the affected CPUs were already idle and had their bits set in the +oneshot broadcast mask then this does no harm. But for non idle CPUs +which were not set this corrupts their state. + +On their next invocation of tick_broadcast_enable() they observe the bit +set, which indicates that the broadcast for the CPU is already set up. +As a consequence they fail to update the broadcast event even if their +earliest expiring timer is before the actually programmed broadcast +event. + +If the programmed broadcast event is far in the future, then this can +cause stalls or trigger the hung task detector. + +Avoid this by telling tick_broadcast_setup_oneshot() explicitly whether +this is the initial switch over from periodic to oneshot broadcast which +must take the periodic broadcast mask into account. In the case of +initialization of a replacement device this prevents that the broadcast +oneshot mask is modified. + +There is a second problem with broadcast device replacement in this +function. The broadcast device is only armed when the previous state of +the device was periodic. + +That is correct for the switch from periodic broadcast mode to oneshot +broadcast mode as the underlying broadcast device could operate in +oneshot state already due to lack of periodic state in hardware. In that +case it is already armed to expire at the next tick. + +For the replacement case this is wrong as the device is in shutdown +state. That means that any already pending broadcast event will not be +armed. + +This went unnoticed because any CPU which goes idle will observe that +the broadcast device has an expiry time of KTIME_MAX and therefore any +CPUs next timer event will be earlier and cause a reprogramming of the +broadcast device. But that does not guarantee that the events of the +CPUs which were already in idle are delivered on time. + +Fix this by arming the newly installed device for an immediate event +which will reevaluate the per CPU expiry times and reprogram the +broadcast device accordingly. This is simpler than caching the last +expiry time in yet another place or saving it before the device exchange +and handing it down to the setup function. Replacement of broadcast +devices is not a frequent operation and usually happens once somewhere +late in the boot process. + +Fixes: 9c336c9935cf ("tick/broadcast: Allow late registered device to enter oneshot mode") +Reported-by: Victor Hassan +Signed-off-by: Thomas Gleixner +Reviewed-by: Frederic Weisbecker +Link: https://lore.kernel.org/r/87pm7d2z1i.ffs@tglx +Signed-off-by: Sasha Levin +--- + kernel/time/tick-broadcast.c | 120 +++++++++++++++++++++++++---------- + 1 file changed, 88 insertions(+), 32 deletions(-) + +diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c +index f7fe6fe361731..0916cc9adb828 100644 +--- a/kernel/time/tick-broadcast.c ++++ b/kernel/time/tick-broadcast.c +@@ -35,14 +35,15 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); + #ifdef CONFIG_TICK_ONESHOT + static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); + +-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); ++static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); + static void tick_broadcast_clear_oneshot(int cpu); + static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); + # ifdef CONFIG_HOTPLUG_CPU + static void tick_broadcast_oneshot_offline(unsigned int cpu); + # endif + #else +-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } ++static inline void ++tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } + static inline void tick_broadcast_clear_oneshot(int cpu) { } + static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } + # ifdef CONFIG_HOTPLUG_CPU +@@ -264,7 +265,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else +- tick_broadcast_setup_oneshot(bc); ++ tick_broadcast_setup_oneshot(bc, false); + ret = 1; + } else { + /* +@@ -500,7 +501,7 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else +- tick_broadcast_setup_oneshot(bc); ++ tick_broadcast_setup_oneshot(bc, false); + } + } + out: +@@ -1016,48 +1017,101 @@ static inline ktime_t tick_get_next_period(void) + /** + * tick_broadcast_setup_oneshot - setup the broadcast device + */ +-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) ++static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, ++ bool from_periodic) + { + int cpu = smp_processor_id(); ++ ktime_t nexttick = 0; + + if (!bc) + return; + +- /* Set it up only once ! */ +- if (bc->event_handler != tick_handle_oneshot_broadcast) { +- int was_periodic = clockevent_state_periodic(bc); +- +- bc->event_handler = tick_handle_oneshot_broadcast; +- ++ /* ++ * When the broadcast device was switched to oneshot by the first ++ * CPU handling the NOHZ change, the other CPUs will reach this ++ * code via hrtimer_run_queues() -> tick_check_oneshot_change() ++ * too. Set up the broadcast device only once! ++ */ ++ if (bc->event_handler == tick_handle_oneshot_broadcast) { + /* +- * We must be careful here. There might be other CPUs +- * waiting for periodic broadcast. We need to set the +- * oneshot_mask bits for those and program the +- * broadcast device to fire. ++ * The CPU which switched from periodic to oneshot mode ++ * set the broadcast oneshot bit for all other CPUs which ++ * are in the general (periodic) broadcast mask to ensure ++ * that CPUs which wait for the periodic broadcast are ++ * woken up. ++ * ++ * Clear the bit for the local CPU as the set bit would ++ * prevent the first tick_broadcast_enter() after this CPU ++ * switched to oneshot state to program the broadcast ++ * device. ++ * ++ * This code can also be reached via tick_broadcast_control(), ++ * but this cannot avoid the tick_broadcast_clear_oneshot() ++ * as that would break the periodic to oneshot transition of ++ * secondary CPUs. But that's harmless as the below only ++ * clears already cleared bits. + */ ++ tick_broadcast_clear_oneshot(cpu); ++ return; ++ } ++ ++ ++ bc->event_handler = tick_handle_oneshot_broadcast; ++ bc->next_event = KTIME_MAX; ++ ++ /* ++ * When the tick mode is switched from periodic to oneshot it must ++ * be ensured that CPUs which are waiting for periodic broadcast ++ * get their wake-up at the next tick. This is achieved by ORing ++ * tick_broadcast_mask into tick_broadcast_oneshot_mask. ++ * ++ * For other callers, e.g. broadcast device replacement, ++ * tick_broadcast_oneshot_mask must not be touched as this would ++ * set bits for CPUs which are already NOHZ, but not idle. Their ++ * next tick_broadcast_enter() would observe the bit set and fail ++ * to update the expiry time and the broadcast event device. ++ */ ++ if (from_periodic) { + cpumask_copy(tmpmask, tick_broadcast_mask); ++ /* Remove the local CPU as it is obviously not idle */ + cpumask_clear_cpu(cpu, tmpmask); +- cpumask_or(tick_broadcast_oneshot_mask, +- tick_broadcast_oneshot_mask, tmpmask); ++ cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); + +- if (was_periodic && !cpumask_empty(tmpmask)) { +- ktime_t nextevt = tick_get_next_period(); ++ /* ++ * Ensure that the oneshot broadcast handler will wake the ++ * CPUs which are still waiting for periodic broadcast. ++ */ ++ nexttick = tick_get_next_period(); ++ tick_broadcast_init_next_event(tmpmask, nexttick); + +- clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); +- tick_broadcast_init_next_event(tmpmask, nextevt); +- tick_broadcast_set_event(bc, cpu, nextevt); +- } else +- bc->next_event = KTIME_MAX; +- } else { + /* +- * The first cpu which switches to oneshot mode sets +- * the bit for all other cpus which are in the general +- * (periodic) broadcast mask. So the bit is set and +- * would prevent the first broadcast enter after this +- * to program the bc device. ++ * If the underlying broadcast clock event device is ++ * already in oneshot state, then there is nothing to do. ++ * The device was already armed for the next tick ++ * in tick_handle_broadcast_periodic() + */ +- tick_broadcast_clear_oneshot(cpu); ++ if (clockevent_state_oneshot(bc)) ++ return; + } ++ ++ /* ++ * When switching from periodic to oneshot mode arm the broadcast ++ * device for the next tick. ++ * ++ * If the broadcast device has been replaced in oneshot mode and ++ * the oneshot broadcast mask is not empty, then arm it to expire ++ * immediately in order to reevaluate the next expiring timer. ++ * @nexttick is 0 and therefore in the past which will cause the ++ * clockevent code to force an event. ++ * ++ * For both cases the programming can be avoided when the oneshot ++ * broadcast mask is empty. ++ * ++ * tick_broadcast_set_event() implicitly switches the broadcast ++ * device to oneshot state. ++ */ ++ if (!cpumask_empty(tick_broadcast_oneshot_mask)) ++ tick_broadcast_set_event(bc, cpu, nexttick); + } + + /* +@@ -1066,14 +1120,16 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) + void tick_broadcast_switch_to_oneshot(void) + { + struct clock_event_device *bc; ++ enum tick_device_mode oldmode; + unsigned long flags; + + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + ++ oldmode = tick_broadcast_device.mode; + tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; + bc = tick_broadcast_device.evtdev; + if (bc) +- tick_broadcast_setup_oneshot(bc); ++ tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); + + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + } +-- +2.39.2 + -- 2.47.3