Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)

committer Sasha Levin <sashal@kernel.org>

Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)
author Sasha Levin <sashal@kernel.org>
Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)
committer Sasha Levin <sashal@kernel.org>
Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)
diff --git a/queue-5.4/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch b/queue-5.4/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch

new file mode 100644 (file)

index 0000000..7a57dcf
--- /dev/null
+++ b/queue-5.4/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch
@@ -0,0 +1,51 @@
+From 9d39eeb25d9c0569392a6fb3e88296707fe7612c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Sep 2024 13:26:21 -0400
+Subject: Bluetooth: RFCOMM: FIX possible deadlock in rfcomm_sk_state_change
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 08d1914293dae38350b8088980e59fbc699a72fe ]
+
+rfcomm_sk_state_change attempts to use sock_lock so it must never be
+called with it locked but rfcomm_sock_ioctl always attempt to lock it
+causing the following trace:
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.8.0-syzkaller-08951-gfe46a7dd189e #0 Not tainted
+------------------------------------------------------
+syz-executor386/5093 is trying to acquire lock:
+ffff88807c396258 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1671 [inline]
+ffff88807c396258 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0}, at: rfcomm_sk_state_change+0x5b/0x310 net/bluetooth/rfcomm/sock.c:73
+
+but task is already holding lock:
+ffff88807badfd28 (&d->lock){+.+.}-{3:3}, at: __rfcomm_dlc_close+0x226/0x6a0 net/bluetooth/rfcomm/core.c:491
+
+Reported-by: syzbot+d7ce59b06b3eb14fd218@syzkaller.appspotmail.com
+Tested-by: syzbot+d7ce59b06b3eb14fd218@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d7ce59b06b3eb14fd218
+Fixes: 3241ad820dbb ("[Bluetooth] Add timestamp support to L2CAP, RFCOMM and SCO")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/rfcomm/sock.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index e67310a749d27..c52e2f7ff84bd 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -872,9 +872,7 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
+ 
+       if (err == -ENOIOCTLCMD) {
+ #ifdef CONFIG_BT_RFCOMM_TTY
+-              lock_sock(sk);
+               err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg);
+-              release_sock(sk);
+ #else
+               err = -EOPNOTSUPP;
+ #endif
+-- 
+2.43.0
+
diff --git a/queue-5.4/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch b/queue-5.4/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch

new file mode 100644 (file)

index 0000000..331dbef
--- /dev/null
+++ b/queue-5.4/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch
@@ -0,0 +1,44 @@
+From 94a173cccaf47da1e8329e4799c8b1f20c47d0b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 16:14:44 +0800
+Subject: gpio: aspeed: Add the flush write to ensure the write complete.
+
+From: Billy Tsai <billy_tsai@aspeedtech.com>
+
+[ Upstream commit 1bb5a99e1f3fd27accb804aa0443a789161f843c ]
+
+Performing a dummy read ensures that the register write operation is fully
+completed, mitigating any potential bus delays that could otherwise impact
+the frequency of bitbang usage. E.g., if the JTAG application uses GPIO to
+control the JTAG pins (TCK, TMS, TDI, TDO, and TRST), and the application
+sets the TCK clock to 1 MHz, the GPIO's high/low transitions will rely on
+a delay function to ensure the clock frequency does not exceed 1 MHz.
+However, this can lead to rapid toggling of the GPIO because the write
+operation is POSTed and does not wait for a bus acknowledgment.
+
+Fixes: 361b79119a4b ("gpio: Add Aspeed driver")
+Reviewed-by: Andrew Jeffery <andrew@codeconstruct.com.au>
+Signed-off-by: Billy Tsai <billy_tsai@aspeedtech.com>
+Link: https://lore.kernel.org/r/20241008081450.1490955-2-billy_tsai@aspeedtech.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-aspeed.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
+index b7e93d10a6ab6..24bd00a6ae423 100644
+--- a/drivers/gpio/gpio-aspeed.c
++++ b/drivers/gpio/gpio-aspeed.c
+@@ -404,6 +404,8 @@ static void __aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
+       gpio->dcache[GPIO_BANK(offset)] = reg;
+ 
+       iowrite32(reg, addr);
++      /* Flush write */
++      ioread32(addr);
+ }
+ 
+ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
+-- 
+2.43.0
+
diff --git a/queue-5.4/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch b/queue-5.4/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch

new file mode 100644 (file)

index 0000000..343e96c
--- /dev/null
+++ b/queue-5.4/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch
@@ -0,0 +1,37 @@
+From d1f6b22b214a75232c6b3b43d5ba804c946d2fd1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 16:14:45 +0800
+Subject: gpio: aspeed: Use devm_clk api to manage clock source
+
+From: Billy Tsai <billy_tsai@aspeedtech.com>
+
+[ Upstream commit a6191a3d18119184237f4ee600039081ad992320 ]
+
+Replace of_clk_get with devm_clk_get_enabled to manage the clock source.
+
+Fixes: 5ae4cb94b313 ("gpio: aspeed: Add debounce support")
+Reviewed-by: Andrew Jeffery <andrew@codeconstruct.com.au>
+Signed-off-by: Billy Tsai <billy_tsai@aspeedtech.com>
+Link: https://lore.kernel.org/r/20241008081450.1490955-3-billy_tsai@aspeedtech.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-aspeed.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
+index 24bd00a6ae423..9df73ae84841a 100644
+--- a/drivers/gpio/gpio-aspeed.c
++++ b/drivers/gpio/gpio-aspeed.c
+@@ -1159,7 +1159,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
+       if (!gpio_id)
+               return -EINVAL;
+ 
+-      gpio->clk = of_clk_get(pdev->dev.of_node, 0);
++      gpio->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+       if (IS_ERR(gpio->clk)) {
+               dev_warn(&pdev->dev,
+                               "Failed to get clock from devicetree, debouncing disabled\n");
+-- 
+2.43.0
+
diff --git a/queue-5.4/ice-fix-vlan-replay-after-reset.patch b/queue-5.4/ice-fix-vlan-replay-after-reset.patch

new file mode 100644 (file)

index 0000000..0076f8b
--- /dev/null
+++ b/queue-5.4/ice-fix-vlan-replay-after-reset.patch
@@ -0,0 +1,56 @@
+From 2fe4e208085d9d41bb1441a9caf2bce6216d853a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Sep 2024 14:02:56 -0400
+Subject: ice: fix VLAN replay after reset
+
+From: Dave Ertman <david.m.ertman@intel.com>
+
+[ Upstream commit 0eae2c136cb624e4050092feb59f18159b4f2512 ]
+
+There is a bug currently when there are more than one VLAN defined
+and any reset that affects the PF is initiated, after the reset rebuild
+no traffic will pass on any VLAN but the last one created.
+
+This is caused by the iteration though the VLANs during replay each
+clearing the vsi_map bitmap of the VSI that is being replayed.  The
+problem is that during rhe replay, the pointer to the vsi_map bitmap
+is used by each successive vlan to determine if it should be replayed
+on this VSI.
+
+The logic was that the replay of the VLAN would replace the bit in the map
+before the next VLAN would iterate through.  But, since the replay copies
+the old bitmap pointer to filt_replay_rules and creates a new one for the
+recreated VLANS, it does not do this, and leaves the old bitmap broken
+to be used to replay the remaining VLANs.
+
+Since the old bitmap will be cleaned up in post replay cleanup, there is
+no need to alter it and break following VLAN replay, so don't clear the
+bit.
+
+Fixes: 334cb0626de1 ("ice: Implement VSI replay framework")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index de520466f23a3..0d8fa189d2d44 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -2751,8 +2751,6 @@ ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+               if (!itr->vsi_list_info ||
+                   !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+                       continue;
+-              /* Clearing it so that the logic can add it back */
+-              clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+               f_entry.fltr_info.vsi_handle = vsi_handle;
+               f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+               /* update the src in case it is VSI num */
+-- 
+2.43.0
+
diff --git a/queue-5.4/igb-do-not-bring-the-device-up-after-non-fatal-error.patch b/queue-5.4/igb-do-not-bring-the-device-up-after-non-fatal-error.patch

new file mode 100644 (file)

index 0000000..95e9d95
--- /dev/null
+++ b/queue-5.4/igb-do-not-bring-the-device-up-after-non-fatal-error.patch
@@ -0,0 +1,96 @@
+From e21101d6aed7cd7d41aadcd87a93f662a721659c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Sep 2024 15:06:01 -0600
+Subject: igb: Do not bring the device up after non-fatal error
+
+From: Mohamed Khalfella <mkhalfella@purestorage.com>
+
+[ Upstream commit 330a699ecbfc9c26ec92c6310686da1230b4e7eb ]
+
+Commit 004d25060c78 ("igb: Fix igb_down hung on surprise removal")
+changed igb_io_error_detected() to ignore non-fatal pcie errors in order
+to avoid hung task that can happen when igb_down() is called multiple
+times. This caused an issue when processing transient non-fatal errors.
+igb_io_resume(), which is called after igb_io_error_detected(), assumes
+that device is brought down by igb_io_error_detected() if the interface
+is up. This resulted in panic with stacktrace below.
+
+[ T3256] igb 0000:09:00.0 haeth0: igb: haeth0 NIC Link is Down
+[  T292] pcieport 0000:00:1c.5: AER: Uncorrected (Non-Fatal) error received: 0000:09:00.0
+[  T292] igb 0000:09:00.0: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Requester ID)
+[  T292] igb 0000:09:00.0:   device [8086:1537] error status/mask=00004000/00000000
+[  T292] igb 0000:09:00.0:    [14] CmpltTO [  200.105524,009][  T292] igb 0000:09:00.0: AER:   TLP Header: 00000000 00000000 00000000 00000000
+[  T292] pcieport 0000:00:1c.5: AER: broadcast error_detected message
+[  T292] igb 0000:09:00.0: Non-correctable non-fatal error reported.
+[  T292] pcieport 0000:00:1c.5: AER: broadcast mmio_enabled message
+[  T292] pcieport 0000:00:1c.5: AER: broadcast resume message
+[  T292] ------------[ cut here ]------------
+[  T292] kernel BUG at net/core/dev.c:6539!
+[  T292] invalid opcode: 0000 [#1] PREEMPT SMP
+[  T292] RIP: 0010:napi_enable+0x37/0x40
+[  T292] Call Trace:
+[  T292]  <TASK>
+[  T292]  ? die+0x33/0x90
+[  T292]  ? do_trap+0xdc/0x110
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? do_error_trap+0x70/0xb0
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? exc_invalid_op+0x4e/0x70
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? asm_exc_invalid_op+0x16/0x20
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  igb_up+0x41/0x150
+[  T292]  igb_io_resume+0x25/0x70
+[  T292]  report_resume+0x54/0x70
+[  T292]  ? report_frozen_detected+0x20/0x20
+[  T292]  pci_walk_bus+0x6c/0x90
+[  T292]  ? aer_print_port_info+0xa0/0xa0
+[  T292]  pcie_do_recovery+0x22f/0x380
+[  T292]  aer_process_err_devices+0x110/0x160
+[  T292]  aer_isr+0x1c1/0x1e0
+[  T292]  ? disable_irq_nosync+0x10/0x10
+[  T292]  irq_thread_fn+0x1a/0x60
+[  T292]  irq_thread+0xe3/0x1a0
+[  T292]  ? irq_set_affinity_notifier+0x120/0x120
+[  T292]  ? irq_affinity_notify+0x100/0x100
+[  T292]  kthread+0xe2/0x110
+[  T292]  ? kthread_complete_and_exit+0x20/0x20
+[  T292]  ret_from_fork+0x2d/0x50
+[  T292]  ? kthread_complete_and_exit+0x20/0x20
+[  T292]  ret_from_fork_asm+0x11/0x20
+[  T292]  </TASK>
+
+To fix this issue igb_io_resume() checks if the interface is running and
+the device is not down this means igb_io_error_detected() did not bring
+the device down and there is no need to bring it up.
+
+Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Reviewed-by: Yuanyuan Zhong <yzhong@purestorage.com>
+Fixes: 004d25060c78 ("igb: Fix igb_down hung on surprise removal")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 884beeb67a1fb..1e9967657248a 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -9111,6 +9111,10 @@ static void igb_io_resume(struct pci_dev *pdev)
+       struct igb_adapter *adapter = netdev_priv(netdev);
+ 
+       if (netif_running(netdev)) {
++              if (!test_bit(__IGB_DOWN, &adapter->state)) {
++                      dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n");
++                      return;
++              }
+               if (igb_up(adapter)) {
+                       dev_err(&pdev->dev, "igb_up failed after reset\n");
+                       return;
+-- 
+2.43.0
+
diff --git a/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_ack_backlog.patch b/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_ack_backlog.patch

new file mode 100644 (file)

index 0000000..dd456b2
--- /dev/null
+++ b/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_ack_backlog.patch
@@ -0,0 +1,133 @@
+From 88e8ad29aece33e46f2063d0b39479922cce2434 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 14:11:53 -0800
+Subject: net: annotate lockless accesses to sk->sk_ack_backlog
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 288efe8606b62d0753ba6722b36ef241877251fd ]
+
+sk->sk_ack_backlog can be read without any lock being held.
+We need to use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing
+and/or potential KCSAN warnings.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 4d5c70e6155d ("sctp: ensure sk_state is set to CLOSED if hashing fails in sctp_listen_start")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h  | 6 +++---
+ net/ipv4/tcp.c      | 2 +-
+ net/ipv4/tcp_diag.c | 2 +-
+ net/ipv4/tcp_ipv4.c | 2 +-
+ net/ipv6/tcp_ipv6.c | 2 +-
+ net/sched/em_meta.c | 2 +-
+ net/sctp/diag.c     | 2 +-
+ 7 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 250d5a6c508cb..c0d5775bc62c0 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -873,17 +873,17 @@ static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
+ 
+ static inline void sk_acceptq_removed(struct sock *sk)
+ {
+-      sk->sk_ack_backlog--;
++      WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1);
+ }
+ 
+ static inline void sk_acceptq_added(struct sock *sk)
+ {
+-      sk->sk_ack_backlog++;
++      WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
+ }
+ 
+ static inline bool sk_acceptq_is_full(const struct sock *sk)
+ {
+-      return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
++      return READ_ONCE(sk->sk_ack_backlog) > sk->sk_max_ack_backlog;
+ }
+ 
+ /*
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 54399256a4380..4800422169f95 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3280,7 +3280,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
+                * tcpi_unacked -> Number of children ready for accept()
+                * tcpi_sacked  -> max backlog
+                */
+-              info->tcpi_unacked = sk->sk_ack_backlog;
++              info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
+               info->tcpi_sacked = sk->sk_max_ack_backlog;
+               return;
+       }
+diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
+index 549506162ddec..edfbab54c46f4 100644
+--- a/net/ipv4/tcp_diag.c
++++ b/net/ipv4/tcp_diag.c
+@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+       struct tcp_info *info = _info;
+ 
+       if (inet_sk_state_load(sk) == TCP_LISTEN) {
+-              r->idiag_rqueue = sk->sk_ack_backlog;
++              r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+               r->idiag_wqueue = sk->sk_max_ack_backlog;
+       } else if (sk->sk_type == SOCK_STREAM) {
+               const struct tcp_sock *tp = tcp_sk(sk);
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index a0a4dbcf8c12f..1f8a9b323a0dd 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2500,7 +2500,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
+ 
+       state = inet_sk_state_load(sk);
+       if (state == TCP_LISTEN)
+-              rx_queue = sk->sk_ack_backlog;
++              rx_queue = READ_ONCE(sk->sk_ack_backlog);
+       else
+               /* Because we don't lock the socket,
+                * we might find a transient negative value.
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 81b7ef21180bf..8be41d6c4278b 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1921,7 +1921,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+ 
+       state = inet_sk_state_load(sp);
+       if (state == TCP_LISTEN)
+-              rx_queue = sp->sk_ack_backlog;
++              rx_queue = READ_ONCE(sp->sk_ack_backlog);
+       else
+               /* Because we don't lock the socket,
+                * we might find a transient negative value.
+diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
+index ad007cdcec978..c4c297627feac 100644
+--- a/net/sched/em_meta.c
++++ b/net/sched/em_meta.c
+@@ -521,7 +521,7 @@ META_COLLECTOR(int_sk_ack_bl)
+               *err = -1;
+               return;
+       }
+-      dst->value = sk->sk_ack_backlog;
++      dst->value = READ_ONCE(sk->sk_ack_backlog);
+ }
+ 
+ META_COLLECTOR(int_sk_max_ack_bl)
+diff --git a/net/sctp/diag.c b/net/sctp/diag.c
+index 2d0318a7352c2..e0785592fdd63 100644
+--- a/net/sctp/diag.c
++++ b/net/sctp/diag.c
+@@ -416,7 +416,7 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+               r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
+               r->idiag_wqueue = infox->asoc->sndbuf_used;
+       } else {
+-              r->idiag_rqueue = sk->sk_ack_backlog;
++              r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+               r->idiag_wqueue = sk->sk_max_ack_backlog;
+       }
+       if (infox->sctpinfo)
+-- 
+2.43.0
+
diff --git a/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_max_ack_back.patch b/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_max_ack_back.patch

new file mode 100644 (file)

index 0000000..9d4692d
--- /dev/null
+++ b/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_max_ack_back.patch
@@ -0,0 +1,160 @@
+From f6a5c5c3fa4e9b36370215304982576fca6c61ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 14:11:54 -0800
+Subject: net: annotate lockless accesses to sk->sk_max_ack_backlog
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 099ecf59f05b5f30f42ebac0ab8cb94f9b18c90c ]
+
+sk->sk_max_ack_backlog can be read without any lock being held
+at least in TCP/DCCP cases.
+
+We need to use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing
+and/or potential KCSAN warnings.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 4d5c70e6155d ("sctp: ensure sk_state is set to CLOSED if hashing fails in sctp_listen_start")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h              | 2 +-
+ net/dccp/proto.c                | 2 +-
+ net/ipv4/af_inet.c              | 2 +-
+ net/ipv4/inet_connection_sock.c | 2 +-
+ net/ipv4/tcp.c                  | 2 +-
+ net/ipv4/tcp_diag.c             | 2 +-
+ net/sched/em_meta.c             | 2 +-
+ net/sctp/diag.c                 | 2 +-
+ net/sctp/socket.c               | 4 ++--
+ 9 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index c0d5775bc62c0..986f9724da8d0 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -883,7 +883,7 @@ static inline void sk_acceptq_added(struct sock *sk)
+ 
+ static inline bool sk_acceptq_is_full(const struct sock *sk)
+ {
+-      return READ_ONCE(sk->sk_ack_backlog) > sk->sk_max_ack_backlog;
++      return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
+ }
+ 
+ /*
+diff --git a/net/dccp/proto.c b/net/dccp/proto.c
+index 491b148afa8f0..add742af1c8b9 100644
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -960,7 +960,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
+       if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
+               goto out;
+ 
+-      sk->sk_max_ack_backlog = backlog;
++      WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index e0d0aae343ac8..be2b786cee2bd 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -208,7 +208,7 @@ int inet_listen(struct socket *sock, int backlog)
+       if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+               goto out;
+ 
+-      sk->sk_max_ack_backlog = backlog;
++      WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 091999dbef335..6766a154ff854 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -767,7 +767,7 @@ static void reqsk_timer_handler(struct timer_list *t)
+        * ones are about to clog our table.
+        */
+       qlen = reqsk_queue_len(queue);
+-      if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
++      if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
+               int young = reqsk_queue_len_young(queue) << 1;
+ 
+               while (thresh > 2) {
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 4800422169f95..55754bf176d99 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3281,7 +3281,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
+                * tcpi_sacked  -> max backlog
+                */
+               info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
+-              info->tcpi_sacked = sk->sk_max_ack_backlog;
++              info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
+               return;
+       }
+ 
+diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
+index edfbab54c46f4..0d08f9e2d8d03 100644
+--- a/net/ipv4/tcp_diag.c
++++ b/net/ipv4/tcp_diag.c
+@@ -22,7 +22,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ 
+       if (inet_sk_state_load(sk) == TCP_LISTEN) {
+               r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+-              r->idiag_wqueue = sk->sk_max_ack_backlog;
++              r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
+       } else if (sk->sk_type == SOCK_STREAM) {
+               const struct tcp_sock *tp = tcp_sk(sk);
+ 
+diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
+index c4c297627feac..46254968d390f 100644
+--- a/net/sched/em_meta.c
++++ b/net/sched/em_meta.c
+@@ -532,7 +532,7 @@ META_COLLECTOR(int_sk_max_ack_bl)
+               *err = -1;
+               return;
+       }
+-      dst->value = sk->sk_max_ack_backlog;
++      dst->value = READ_ONCE(sk->sk_max_ack_backlog);
+ }
+ 
+ META_COLLECTOR(int_sk_prio)
+diff --git a/net/sctp/diag.c b/net/sctp/diag.c
+index e0785592fdd63..2fcfb8cc8bd12 100644
+--- a/net/sctp/diag.c
++++ b/net/sctp/diag.c
+@@ -417,7 +417,7 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+               r->idiag_wqueue = infox->asoc->sndbuf_used;
+       } else {
+               r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+-              r->idiag_wqueue = sk->sk_max_ack_backlog;
++              r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
+       }
+       if (infox->sctpinfo)
+               sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index eef807edd61da..efc9981481c5f 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8407,7 +8407,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
+               }
+       }
+ 
+-      sk->sk_max_ack_backlog = backlog;
++      WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+       return sctp_hash_endpoint(ep);
+ }
+ 
+@@ -8461,7 +8461,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
+ 
+       /* If we are already listening, just update the backlog */
+       if (sctp_sstate(sk, LISTENING))
+-              sk->sk_max_ack_backlog = backlog;
++              WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+       else {
+               err = sctp_listen_start(sk, backlog);
+               if (err)
+-- 
+2.43.0
+
diff --git a/queue-5.4/net-ibm-emac-mal-fix-wrong-goto.patch b/queue-5.4/net-ibm-emac-mal-fix-wrong-goto.patch

new file mode 100644 (file)

index 0000000..b09d32b
--- /dev/null
+++ b/queue-5.4/net-ibm-emac-mal-fix-wrong-goto.patch
@@ -0,0 +1,36 @@
+From b79973a4b357274558bd5227728f1d1088be1591 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 16:57:11 -0700
+Subject: net: ibm: emac: mal: fix wrong goto
+
+From: Rosen Penev <rosenp@gmail.com>
+
+[ Upstream commit 08c8acc9d8f3f70d62dd928571368d5018206490 ]
+
+dcr_map is called in the previous if and therefore needs to be unmapped.
+
+Fixes: 1ff0fcfcb1a6 ("ibm_newemac: Fix new MAL feature handling")
+Signed-off-by: Rosen Penev <rosenp@gmail.com>
+Link: https://patch.msgid.link/20241007235711.5714-1-rosenp@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ibm/emac/mal.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
+index 075c07303f165..b095d5057b5eb 100644
+--- a/drivers/net/ethernet/ibm/emac/mal.c
++++ b/drivers/net/ethernet/ibm/emac/mal.c
+@@ -576,7 +576,7 @@ static int mal_probe(struct platform_device *ofdev)
+               printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n",
+                               ofdev->dev.of_node);
+               err = -ENODEV;
+-              goto fail;
++              goto fail_unmap;
+ #endif
+       }
+ 
+-- 
+2.43.0
+
diff --git a/queue-5.4/net-sched-accept-tca_stab-only-for-root-qdisc.patch b/queue-5.4/net-sched-accept-tca_stab-only-for-root-qdisc.patch

new file mode 100644 (file)

index 0000000..0e2b910
--- /dev/null
+++ b/queue-5.4/net-sched-accept-tca_stab-only-for-root-qdisc.patch
@@ -0,0 +1,150 @@
+From 184c6b1d99d2dbc4bc10e704655f2d93df690c37 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 18:41:30 +0000
+Subject: net/sched: accept TCA_STAB only for root qdisc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3cb7cf1540ddff5473d6baeb530228d19bc97b8a ]
+
+Most qdiscs maintain their backlog using qdisc_pkt_len(skb)
+on the assumption it is invariant between the enqueue()
+and dequeue() handlers.
+
+Unfortunately syzbot can crash a host rather easily using
+a TBF + SFQ combination, with an STAB on SFQ [1]
+
+We can't support TCA_STAB on arbitrary level, this would
+require to maintain per-qdisc storage.
+
+[1]
+[   88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000
+[   88.798611] #PF: supervisor read access in kernel mode
+[   88.799014] #PF: error_code(0x0000) - not-present page
+[   88.799506] PGD 0 P4D 0
+[   88.799829] Oops: Oops: 0000 [#1] SMP NOPTI
+[   88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117
+[   88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+[   88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
+[ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00
+All code
+========
+   0:  0f b7 50 12             movzwl 0x12(%rax),%edx
+   4:  48 8d 04 d5 00 00 00    lea    0x0(,%rdx,8),%rax
+   b:  00
+   c:  48 89 d6                mov    %rdx,%rsi
+   f:  48 29 d0                sub    %rdx,%rax
+  12:  48 8b 91 c0 01 00 00    mov    0x1c0(%rcx),%rdx
+  19:  48 c1 e0 03             shl    $0x3,%rax
+  1d:  48 01 c2                add    %rax,%rdx
+  20:  66 83 7a 1a 00          cmpw   $0x0,0x1a(%rdx)
+  25:  7e c0                   jle    0xffffffffffffffe7
+  27:  48 8b 3a                mov    (%rdx),%rdi
+  2a:* 4c 8b 07                mov    (%rdi),%r8               <-- trapping instruction
+  2d:  4c 89 02                mov    %r8,(%rdx)
+  30:  49 89 50 08             mov    %rdx,0x8(%r8)
+  34:  48 c7 47 08 00 00 00    movq   $0x0,0x8(%rdi)
+  3b:  00
+  3c:  48                      rex.W
+  3d:  c7                      .byte 0xc7
+  3e:  07                      (bad)
+       ...
+
+Code starting with the faulting instruction
+===========================================
+   0:  4c 8b 07                mov    (%rdi),%r8
+   3:  4c 89 02                mov    %r8,(%rdx)
+   6:  49 89 50 08             mov    %rdx,0x8(%r8)
+   a:  48 c7 47 08 00 00 00    movq   $0x0,0x8(%rdi)
+  11:  00
+  12:  48                      rex.W
+  13:  c7                      .byte 0xc7
+  14:  07                      (bad)
+       ...
+[   88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206
+[   88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800
+[   88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000
+[   88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f
+[   88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140
+[   88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac
+[   88.806734] FS:  00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000
+[   88.807225] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0
+[   88.808165] Call Trace:
+[   88.808459]  <TASK>
+[   88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434)
+[   88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715)
+[   88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
+[   88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
+[   88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
+[   88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq
+[   88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
+[   88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf
+[   88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
+[   88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958)
+[   88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673)
+[   88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517)
+[   88.812505] __fput (fs/file_table.c:432 (discriminator 1))
+[   88.812735] task_work_run (kernel/task_work.c:230)
+[   88.813016] do_exit (kernel/exit.c:940)
+[   88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4))
+[   88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088)
+[   88.813867] do_group_exit (kernel/exit.c:1070)
+[   88.814138] __x64_sys_exit_group (kernel/exit.c:1099)
+[   88.814490] x64_sys_call (??:?)
+[   88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
+[   88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+[   88.815495] RIP: 0033:0x7f44560f1975
+
+Fixes: 175f9c1bba9b ("net_sched: Add size table for qdiscs")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sch_generic.h | 1 -
+ net/sched/sch_api.c       | 7 ++++++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index e8034756cbf8e..6d934ce54c8dd 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -827,7 +827,6 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
+ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                               struct sk_buff **to_free)
+ {
+-      qdisc_calculate_pkt_len(skb, sch);
+       return sch->enqueue(skb, sch, to_free);
+ }
+ 
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index d07146a2d0bba..069d0d8a89397 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -586,7 +586,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+               pkt_len = 1;
+       qdisc_skb_cb(skb)->pkt_len = pkt_len;
+ }
+-EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
+ 
+ void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
+ {
+@@ -1110,6 +1109,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+                       return -EINVAL;
+               }
+ 
++              if (new &&
++                  !(parent->flags & TCQ_F_MQROOT) &&
++                  rcu_access_pointer(new->stab)) {
++                      NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
++                      return -EINVAL;
++              }
+               err = cops->graft(parent, cl, new, &old, extack);
+               if (err)
+                       return err;
+-- 
+2.43.0
+
diff --git a/queue-5.4/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch b/queue-5.4/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch

new file mode 100644 (file)

index 0000000..3215aa0
--- /dev/null
+++ b/queue-5.4/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch
@@ -0,0 +1,179 @@
+From ef27f9253de34ade463e8312128de846b98fbf92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 08:43:59 -0700
+Subject: netfilter: br_netfilter: fix panic with metadata_dst skb
+
+From: Andy Roulin <aroulin@nvidia.com>
+
+[ Upstream commit f9ff7665cd128012868098bbd07e28993e314fdb ]
+
+Fix a kernel panic in the br_netfilter module when sending untagged
+traffic via a VxLAN device.
+This happens during the check for fragmentation in br_nf_dev_queue_xmit.
+
+It is dependent on:
+1) the br_netfilter module being loaded;
+2) net.bridge.bridge-nf-call-iptables set to 1;
+3) a bridge with a VxLAN (single-vxlan-device) netdevice as a bridge port;
+4) untagged frames with size higher than the VxLAN MTU forwarded/flooded
+
+When forwarding the untagged packet to the VxLAN bridge port, before
+the netfilter hooks are called, br_handle_egress_vlan_tunnel is called and
+changes the skb_dst to the tunnel dst. The tunnel_dst is a metadata type
+of dst, i.e., skb_valid_dst(skb) is false, and metadata->dst.dev is NULL.
+
+Then in the br_netfilter hooks, in br_nf_dev_queue_xmit, there's a check
+for frames that needs to be fragmented: frames with higher MTU than the
+VxLAN device end up calling br_nf_ip_fragment, which in turns call
+ip_skb_dst_mtu.
+
+The ip_dst_mtu tries to use the skb_dst(skb) as if it was a valid dst
+with valid dst->dev, thus the crash.
+
+This case was never supported in the first place, so drop the packet
+instead.
+
+PING 10.0.0.2 (10.0.0.2) from 0.0.0.0 h1-eth0: 2000(2028) bytes of data.
+[  176.291791] Unable to handle kernel NULL pointer dereference at
+virtual address 0000000000000110
+[  176.292101] Mem abort info:
+[  176.292184]   ESR = 0x0000000096000004
+[  176.292322]   EC = 0x25: DABT (current EL), IL = 32 bits
+[  176.292530]   SET = 0, FnV = 0
+[  176.292709]   EA = 0, S1PTW = 0
+[  176.292862]   FSC = 0x04: level 0 translation fault
+[  176.293013] Data abort info:
+[  176.293104]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+[  176.293488]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[  176.293787]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[  176.293995] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000043ef5000
+[  176.294166] [0000000000000110] pgd=0000000000000000,
+p4d=0000000000000000
+[  176.294827] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[  176.295252] Modules linked in: vxlan ip6_udp_tunnel udp_tunnel veth
+br_netfilter bridge stp llc ipv6 crct10dif_ce
+[  176.295923] CPU: 0 PID: 188 Comm: ping Not tainted
+6.8.0-rc3-g5b3fbd61b9d1 #2
+[  176.296314] Hardware name: linux,dummy-virt (DT)
+[  176.296535] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS
+BTYPE=--)
+[  176.296808] pc : br_nf_dev_queue_xmit+0x390/0x4ec [br_netfilter]
+[  176.297382] lr : br_nf_dev_queue_xmit+0x2ac/0x4ec [br_netfilter]
+[  176.297636] sp : ffff800080003630
+[  176.297743] x29: ffff800080003630 x28: 0000000000000008 x27:
+ffff6828c49ad9f8
+[  176.298093] x26: ffff6828c49ad000 x25: 0000000000000000 x24:
+00000000000003e8
+[  176.298430] x23: 0000000000000000 x22: ffff6828c4960b40 x21:
+ffff6828c3b16d28
+[  176.298652] x20: ffff6828c3167048 x19: ffff6828c3b16d00 x18:
+0000000000000014
+[  176.298926] x17: ffffb0476322f000 x16: ffffb7e164023730 x15:
+0000000095744632
+[  176.299296] x14: ffff6828c3f1c880 x13: 0000000000000002 x12:
+ffffb7e137926a70
+[  176.299574] x11: 0000000000000001 x10: ffff6828c3f1c898 x9 :
+0000000000000000
+[  176.300049] x8 : ffff6828c49bf070 x7 : 0008460f18d5f20e x6 :
+f20e0100bebafeca
+[  176.300302] x5 : ffff6828c7f918fe x4 : ffff6828c49bf070 x3 :
+0000000000000000
+[  176.300586] x2 : 0000000000000000 x1 : ffff6828c3c7ad00 x0 :
+ffff6828c7f918f0
+[  176.300889] Call trace:
+[  176.301123]  br_nf_dev_queue_xmit+0x390/0x4ec [br_netfilter]
+[  176.301411]  br_nf_post_routing+0x2a8/0x3e4 [br_netfilter]
+[  176.301703]  nf_hook_slow+0x48/0x124
+[  176.302060]  br_forward_finish+0xc8/0xe8 [bridge]
+[  176.302371]  br_nf_hook_thresh+0x124/0x134 [br_netfilter]
+[  176.302605]  br_nf_forward_finish+0x118/0x22c [br_netfilter]
+[  176.302824]  br_nf_forward_ip.part.0+0x264/0x290 [br_netfilter]
+[  176.303136]  br_nf_forward+0x2b8/0x4e0 [br_netfilter]
+[  176.303359]  nf_hook_slow+0x48/0x124
+[  176.303803]  __br_forward+0xc4/0x194 [bridge]
+[  176.304013]  br_flood+0xd4/0x168 [bridge]
+[  176.304300]  br_handle_frame_finish+0x1d4/0x5c4 [bridge]
+[  176.304536]  br_nf_hook_thresh+0x124/0x134 [br_netfilter]
+[  176.304978]  br_nf_pre_routing_finish+0x29c/0x494 [br_netfilter]
+[  176.305188]  br_nf_pre_routing+0x250/0x524 [br_netfilter]
+[  176.305428]  br_handle_frame+0x244/0x3cc [bridge]
+[  176.305695]  __netif_receive_skb_core.constprop.0+0x33c/0xecc
+[  176.306080]  __netif_receive_skb_one_core+0x40/0x8c
+[  176.306197]  __netif_receive_skb+0x18/0x64
+[  176.306369]  process_backlog+0x80/0x124
+[  176.306540]  __napi_poll+0x38/0x17c
+[  176.306636]  net_rx_action+0x124/0x26c
+[  176.306758]  __do_softirq+0x100/0x26c
+[  176.307051]  ____do_softirq+0x10/0x1c
+[  176.307162]  call_on_irq_stack+0x24/0x4c
+[  176.307289]  do_softirq_own_stack+0x1c/0x2c
+[  176.307396]  do_softirq+0x54/0x6c
+[  176.307485]  __local_bh_enable_ip+0x8c/0x98
+[  176.307637]  __dev_queue_xmit+0x22c/0xd28
+[  176.307775]  neigh_resolve_output+0xf4/0x1a0
+[  176.308018]  ip_finish_output2+0x1c8/0x628
+[  176.308137]  ip_do_fragment+0x5b4/0x658
+[  176.308279]  ip_fragment.constprop.0+0x48/0xec
+[  176.308420]  __ip_finish_output+0xa4/0x254
+[  176.308593]  ip_finish_output+0x34/0x130
+[  176.308814]  ip_output+0x6c/0x108
+[  176.308929]  ip_send_skb+0x50/0xf0
+[  176.309095]  ip_push_pending_frames+0x30/0x54
+[  176.309254]  raw_sendmsg+0x758/0xaec
+[  176.309568]  inet_sendmsg+0x44/0x70
+[  176.309667]  __sys_sendto+0x110/0x178
+[  176.309758]  __arm64_sys_sendto+0x28/0x38
+[  176.309918]  invoke_syscall+0x48/0x110
+[  176.310211]  el0_svc_common.constprop.0+0x40/0xe0
+[  176.310353]  do_el0_svc+0x1c/0x28
+[  176.310434]  el0_svc+0x34/0xb4
+[  176.310551]  el0t_64_sync_handler+0x120/0x12c
+[  176.310690]  el0t_64_sync+0x190/0x194
+[  176.311066] Code: f9402e61 79402aa2 927ff821 f9400023 (f9408860)
+[  176.315743] ---[ end trace 0000000000000000 ]---
+[  176.316060] Kernel panic - not syncing: Oops: Fatal exception in
+interrupt
+[  176.316371] Kernel Offset: 0x37e0e3000000 from 0xffff800080000000
+[  176.316564] PHYS_OFFSET: 0xffff97d780000000
+[  176.316782] CPU features: 0x0,88000203,3c020000,0100421b
+[  176.317210] Memory Limit: none
+[  176.317527] ---[ end Kernel panic - not syncing: Oops: Fatal
+Exception in interrupt ]---\
+
+Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Andy Roulin <aroulin@nvidia.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://patch.msgid.link/20241001154400.22787-2-aroulin@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_netfilter_hooks.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 277b6fb92ac5f..a16559140541c 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -33,6 +33,7 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/addrconf.h>
++#include <net/dst_metadata.h>
+ #include <net/route.h>
+ #include <net/netfilter/br_netfilter.h>
+ #include <net/netns/generic.h>
+@@ -753,6 +754,10 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
+               return br_dev_queue_push_xmit(net, sk, skb);
+       }
+ 
++      /* Fragmentation on metadata/template dst is not supported */
++      if (unlikely(!skb_valid_dst(skb)))
++              goto drop;
++
+       /* This is wrong! We should preserve the original fragment
+        * boundaries by preserving frag_list rather than refragmenting.
+        */
+-- 
+2.43.0
+
diff --git a/queue-5.4/ppp-fix-ppp_async_encode-illegal-access.patch b/queue-5.4/ppp-fix-ppp_async_encode-illegal-access.patch

new file mode 100644 (file)

index 0000000..1c844fe
--- /dev/null
+++ b/queue-5.4/ppp-fix-ppp_async_encode-illegal-access.patch
@@ -0,0 +1,91 @@
+From 74de69a96327d7fc28322578367bf44ec53e50b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 18:58:02 +0000
+Subject: ppp: fix ppp_async_encode() illegal access
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 40dddd4b8bd08a69471efd96107a4e1c73fabefc ]
+
+syzbot reported an issue in ppp_async_encode() [1]
+
+In this case, pppoe_sendmsg() is called with a zero size.
+Then ppp_async_encode() is called with an empty skb.
+
+BUG: KMSAN: uninit-value in ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline]
+ BUG: KMSAN: uninit-value in ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675
+  ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline]
+  ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675
+  ppp_async_send+0x130/0x1b0 drivers/net/ppp/ppp_async.c:634
+  ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2280 [inline]
+  ppp_input+0x1f1/0xe60 drivers/net/ppp/ppp_generic.c:2304
+  pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379
+  sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113
+  __release_sock+0x1da/0x330 net/core/sock.c:3072
+  release_sock+0x6b/0x250 net/core/sock.c:3626
+  pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:4092 [inline]
+  slab_alloc_node mm/slub.c:4135 [inline]
+  kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4187
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587
+  __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678
+  alloc_skb include/linux/skbuff.h:1322 [inline]
+  sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732
+  pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+CPU: 1 UID: 0 PID: 5411 Comm: syz.1.14 Not tainted 6.12.0-rc1-syzkaller-00165-g360c1f1f24c6 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+1d121645899e7692f92a@syzkaller.appspotmail.com
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241009185802.3763282-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/ppp_async.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
+index 34c31d6da734c..c41eeecb5a106 100644
+--- a/drivers/net/ppp/ppp_async.c
++++ b/drivers/net/ppp/ppp_async.c
+@@ -551,7 +551,7 @@ ppp_async_encode(struct asyncppp *ap)
+        * and 7 (code-reject) must be sent as though no options
+        * had been negotiated.
+        */
+-      islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7;
++      islcp = proto == PPP_LCP && count >= 3 && 1 <= data[2] && data[2] <= 7;
+ 
+       if (i == 0) {
+               if (islcp)
+-- 
+2.43.0
+
diff --git a/queue-5.4/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch b/queue-5.4/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch

new file mode 100644 (file)

index 0000000..3ad9411
--- /dev/null
+++ b/queue-5.4/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch
@@ -0,0 +1,78 @@
+From a50a4338703035aae311faf884cfe4c172eaadd0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 12:25:11 -0400
+Subject: sctp: ensure sk_state is set to CLOSED if hashing fails in
+ sctp_listen_start
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 4d5c70e6155d5eae198bade4afeab3c1b15073b6 ]
+
+If hashing fails in sctp_listen_start(), the socket remains in the
+LISTENING state, even though it was not added to the hash table.
+This can lead to a scenario where a socket appears to be listening
+without actually being accessible.
+
+This patch ensures that if the hashing operation fails, the sk_state
+is set back to CLOSED before returning an error.
+
+Note that there is no need to undo the autobind operation if hashing
+fails, as the bind port can still be used for next listen() call on
+the same socket.
+
+Fixes: 76c6d988aeb3 ("sctp: add sock_reuseport for the sock in __sctp_hash_endpoint")
+Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/socket.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index efc9981481c5f..4aee4fc05ba7c 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8370,6 +8370,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
+       struct sctp_endpoint *ep = sp->ep;
+       struct crypto_shash *tfm = NULL;
+       char alg[32];
++      int err;
+ 
+       /* Allocate HMAC for generating cookie. */
+       if (!sp->hmac && sp->sctp_hmac_alg) {
+@@ -8397,18 +8398,25 @@ static int sctp_listen_start(struct sock *sk, int backlog)
+       inet_sk_set_state(sk, SCTP_SS_LISTENING);
+       if (!ep->base.bind_addr.port) {
+               if (sctp_autobind(sk)) {
+-                      inet_sk_set_state(sk, SCTP_SS_CLOSED);
+-                      return -EAGAIN;
++                      err = -EAGAIN;
++                      goto err;
+               }
+       } else {
+               if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
+-                      inet_sk_set_state(sk, SCTP_SS_CLOSED);
+-                      return -EADDRINUSE;
++                      err = -EADDRINUSE;
++                      goto err;
+               }
+       }
+ 
+       WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+-      return sctp_hash_endpoint(ep);
++      err = sctp_hash_endpoint(ep);
++      if (err)
++              goto err;
++
++      return 0;
++err:
++      inet_sk_set_state(sk, SCTP_SS_CLOSED);
++      return err;
+ }
+ 
+ /*
+-- 
+2.43.0
+
diff --git a/queue-5.4/series b/queue-5.4/series

index bbf01c1c84b03fef19bd9280ff2ff8d252b4db1c..e41ad0394ef4392e6948321d419e660caa70e9e8 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -330,3 +330,19 @@ tools-iio-add-memory-allocation-failure-check-for-tr.patch
  driver-core-bus-return-eio-instead-of-0-when-show-st.patch
  fbdev-sisfb-fix-strbuf-array-overflow.patch
  rdma-rxe-fix-seg-fault-in-rxe_comp_queue_pkt.patch
+ice-fix-vlan-replay-after-reset.patch
+sunrpc-fix-integer-overflow-in-decode_rc_list.patch
+tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch
+tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch
+netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch
+bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch
+gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch
+gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch
+igb-do-not-bring-the-device-up-after-non-fatal-error.patch
+net-sched-accept-tca_stab-only-for-root-qdisc.patch
+net-ibm-emac-mal-fix-wrong-goto.patch
+net-annotate-lockless-accesses-to-sk-sk_ack_backlog.patch
+net-annotate-lockless-accesses-to-sk-sk_max_ack_back.patch
+sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch
+ppp-fix-ppp_async_encode-illegal-access.patch
+slip-make-slhc_remember-more-robust-against-maliciou.patch
diff --git a/queue-5.4/slip-make-slhc_remember-more-robust-against-maliciou.patch b/queue-5.4/slip-make-slhc_remember-more-robust-against-maliciou.patch

new file mode 100644 (file)

index 0000000..556d08a
--- /dev/null
+++ b/queue-5.4/slip-make-slhc_remember-more-robust-against-maliciou.patch
@@ -0,0 +1,170 @@
+From 93f10b7cbd8d81af54867d4b7744e3a025744f83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 09:11:32 +0000
+Subject: slip: make slhc_remember() more robust against malicious packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7d3fce8cbe3a70a1c7c06c9b53696be5d5d8dd5c ]
+
+syzbot found that slhc_remember() was missing checks against
+malicious packets [1].
+
+slhc_remember() only checked the size of the packet was at least 20,
+which is not good enough.
+
+We need to make sure the packet includes the IPv4 and TCP header
+that are supposed to be carried.
+
+Add iph and th pointers to make the code more readable.
+
+[1]
+
+BUG: KMSAN: uninit-value in slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666
+  slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666
+  ppp_receive_nonmp_frame+0xe45/0x35e0 drivers/net/ppp/ppp_generic.c:2455
+  ppp_receive_frame drivers/net/ppp/ppp_generic.c:2372 [inline]
+  ppp_do_recv+0x65f/0x40d0 drivers/net/ppp/ppp_generic.c:2212
+  ppp_input+0x7dc/0xe60 drivers/net/ppp/ppp_generic.c:2327
+  pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379
+  sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113
+  __release_sock+0x1da/0x330 net/core/sock.c:3072
+  release_sock+0x6b/0x250 net/core/sock.c:3626
+  pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:4091 [inline]
+  slab_alloc_node mm/slub.c:4134 [inline]
+  kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4186
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587
+  __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678
+  alloc_skb include/linux/skbuff.h:1322 [inline]
+  sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732
+  pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+CPU: 0 UID: 0 PID: 5460 Comm: syz.2.33 Not tainted 6.12.0-rc2-syzkaller-00006-g87d6aab2389e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+
+Fixes: b5451d783ade ("slip: Move the SLIP drivers")
+Reported-by: syzbot+2ada1bc857496353be5a@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/670646db.050a0220.3f80e.0027.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241009091132.2136321-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/slip/slhc.c | 57 ++++++++++++++++++++++++-----------------
+ 1 file changed, 34 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
+index f78ceba42e57e..603a29f3905ba 100644
+--- a/drivers/net/slip/slhc.c
++++ b/drivers/net/slip/slhc.c
+@@ -643,46 +643,57 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
+ int
+ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize)
+ {
+-      struct cstate *cs;
+-      unsigned ihl;
+-
++      const struct tcphdr *th;
+       unsigned char index;
++      struct iphdr *iph;
++      struct cstate *cs;
++      unsigned int ihl;
+ 
+-      if(isize < 20) {
+-              /* The packet is shorter than a legal IP header */
++      /* The packet is shorter than a legal IP header.
++       * Also make sure isize is positive.
++       */
++      if (isize < (int)sizeof(struct iphdr)) {
++runt:
+               comp->sls_i_runt++;
+-              return slhc_toss( comp );
++              return slhc_toss(comp);
+       }
++      iph = (struct iphdr *)icp;
+       /* Peek at the IP header's IHL field to find its length */
+-      ihl = icp[0] & 0xf;
+-      if(ihl < 20 / 4){
+-              /* The IP header length field is too small */
+-              comp->sls_i_runt++;
+-              return slhc_toss( comp );
+-      }
+-      index = icp[9];
+-      icp[9] = IPPROTO_TCP;
++      ihl = iph->ihl;
++      /* The IP header length field is too small,
++       * or packet is shorter than the IP header followed
++       * by minimal tcp header.
++       */
++      if (ihl < 5 || isize < ihl * 4 + sizeof(struct tcphdr))
++              goto runt;
++
++      index = iph->protocol;
++      iph->protocol = IPPROTO_TCP;
+ 
+       if (ip_fast_csum(icp, ihl)) {
+               /* Bad IP header checksum; discard */
+               comp->sls_i_badcheck++;
+-              return slhc_toss( comp );
++              return slhc_toss(comp);
+       }
+-      if(index > comp->rslot_limit) {
++      if (index > comp->rslot_limit) {
+               comp->sls_i_error++;
+               return slhc_toss(comp);
+       }
+-
++      th = (struct tcphdr *)(icp + ihl * 4);
++      if (th->doff < sizeof(struct tcphdr) / 4)
++              goto runt;
++      if (isize < ihl * 4 + th->doff * 4)
++              goto runt;
+       /* Update local state */
+       cs = &comp->rstate[comp->recv_current = index];
+       comp->flags &=~ SLF_TOSS;
+-      memcpy(&cs->cs_ip,icp,20);
+-      memcpy(&cs->cs_tcp,icp + ihl*4,20);
++      memcpy(&cs->cs_ip, iph, sizeof(*iph));
++      memcpy(&cs->cs_tcp, th, sizeof(*th));
+       if (ihl > 5)
+-        memcpy(cs->cs_ipopt, icp + sizeof(struct iphdr), (ihl - 5) * 4);
+-      if (cs->cs_tcp.doff > 5)
+-        memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4);
+-      cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2;
++        memcpy(cs->cs_ipopt, &iph[1], (ihl - 5) * 4);
++      if (th->doff > 5)
++        memcpy(cs->cs_tcpopt, &th[1], (th->doff - 5) * 4);
++      cs->cs_hsize = ihl*2 + th->doff*2;
+       cs->initialized = true;
+       /* Put headers back on packet
+        * Neither header checksum is recalculated
+-- 
+2.43.0
+
diff --git a/queue-5.4/sunrpc-fix-integer-overflow-in-decode_rc_list.patch b/queue-5.4/sunrpc-fix-integer-overflow-in-decode_rc_list.patch

new file mode 100644 (file)

index 0000000..031c7ea
--- /dev/null
+++ b/queue-5.4/sunrpc-fix-integer-overflow-in-decode_rc_list.patch
@@ -0,0 +1,37 @@
+From 5e8b28c01c57ad46aa850b7f3a7988729a4b1c6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Sep 2024 11:50:33 +0300
+Subject: SUNRPC: Fix integer overflow in decode_rc_list()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 6dbf1f341b6b35bcc20ff95b6b315e509f6c5369 ]
+
+The math in "rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)" could have an
+integer overflow.  Add bounds checking on rc_list->rcl_nrefcalls to fix
+that.
+
+Fixes: 4aece6a19cf7 ("nfs41: cb_sequence xdr implementation")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/callback_xdr.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index 04d27f0ed39ac..1b860995e6bcf 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -372,6 +372,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
+ 
+       rc_list->rcl_nrefcalls = ntohl(*p++);
+       if (rc_list->rcl_nrefcalls) {
++              if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len))
++                      goto out;
+               p = xdr_inline_decode(xdr,
+                            rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
+               if (unlikely(p == NULL))
+-- 
+2.43.0
+
diff --git a/queue-5.4/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch b/queue-5.4/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch

new file mode 100644 (file)

index 0000000..0b92b9b
--- /dev/null
+++ b/queue-5.4/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch
@@ -0,0 +1,153 @@
+From 95f703c4b535433eb0856ef82084a816f865d4fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 20:05:16 +0000
+Subject: tcp: fix tcp_enter_recovery() to zero retrans_stamp when it's safe
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit b41b4cbd9655bcebcce941bef3601db8110335be ]
+
+Fix tcp_enter_recovery() so that if there are no retransmits out then
+we zero retrans_stamp when entering fast recovery. This is necessary
+to fix two buggy behaviors.
+
+Currently a non-zero retrans_stamp value can persist across multiple
+back-to-back loss recovery episodes. This is because we generally only
+clears retrans_stamp if we are completely done with loss recoveries,
+and get to tcp_try_to_open() and find !tcp_any_retrans_done(sk). This
+behavior causes two bugs:
+
+(1) When a loss recovery episode (CA_Loss or CA_Recovery) is followed
+immediately by a new CA_Recovery, the retrans_stamp value can persist
+and can be a time before this new CA_Recovery episode starts. That
+means that timestamp-based undo will be using the wrong retrans_stamp
+(a value that is too old) when comparing incoming TS ecr values to
+retrans_stamp to see if the current fast recovery episode can be
+undone.
+
+(2) If there is a roughly minutes-long sequence of back-to-back fast
+recovery episodes, one after another (e.g. in a shallow-buffered or
+policed bottleneck), where each fast recovery successfully makes
+forward progress and recovers one window of sequence space (but leaves
+at least one retransmit in flight at the end of the recovery),
+followed by several RTOs, then the ETIMEDOUT check may be using the
+wrong retrans_stamp (a value set at the start of the first fast
+recovery in the sequence). This can cause a very premature ETIMEDOUT,
+killing the connection prematurely.
+
+This commit changes the code to zero retrans_stamp when entering fast
+recovery, when this is known to be safe (no retransmits are out in the
+network). That ensures that when starting a fast recovery episode, and
+it is safe to do so, retrans_stamp is set when we send the fast
+retransmit packet. That addresses both bug (1) and bug (2) by ensuring
+that (if no retransmits are out when we start a fast recovery) we use
+the initial fast retransmit of this fast recovery as the time value
+for undo and ETIMEDOUT calculations.
+
+This makes intuitive sense, since the start of a new fast recovery
+episode (in a scenario where no lost packets are out in the network)
+means that the connection has made forward progress since the last RTO
+or fast recovery, and we should thus "restart the clock" used for both
+undo and ETIMEDOUT logic.
+
+Note that if when we start fast recovery there *are* retransmits out
+in the network, there can still be undesirable (1)/(2) issues. For
+example, after this patch we can still have the (1) and (2) problems
+in cases like this:
+
++ round 1: sender sends flight 1
+
++ round 2: sender receives SACKs and enters fast recovery 1,
+  retransmits some packets in flight 1 and then sends some new data as
+  flight 2
+
++ round 3: sender receives some SACKs for flight 2, notes losses, and
+  retransmits some packets to fill the holes in flight 2
+
++ fast recovery has some lost retransmits in flight 1 and continues
+  for one or more rounds sending retransmits for flight 1 and flight 2
+
++ fast recovery 1 completes when snd_una reaches high_seq at end of
+  flight 1
+
++ there are still holes in the SACK scoreboard in flight 2, so we
+  enter fast recovery 2, but some retransmits in the flight 2 sequence
+  range are still in flight (retrans_out > 0), so we can't execute the
+  new retrans_stamp=0 added here to clear retrans_stamp
+
+It's not yet clear how to fix these remaining (1)/(2) issues in an
+efficient way without breaking undo behavior, given that retrans_stamp
+is currently used for undo and ETIMEDOUT. Perhaps the optimal (but
+expensive) strategy would be to set retrans_stamp to the timestamp of
+the earliest outstanding retransmit when entering fast recovery. But
+at least this commit makes things better.
+
+Note that this does not change the semantics of retrans_stamp; it
+simply makes retrans_stamp accurate in some cases where it was not
+before:
+
+(1) Some loss recovery, followed by an immediate entry into a fast
+recovery, where there are no retransmits out when entering the fast
+recovery.
+
+(2) When a TFO server has a SYNACK retransmit that sets retrans_stamp,
+and then the ACK that completes the 3-way handshake has SACK blocks
+that trigger a fast recovery. In this case when entering fast recovery
+we want to zero out the retrans_stamp from the TFO SYNACK retransmit,
+and set the retrans_stamp based on the timestamp of the fast recovery.
+
+We introduce a tcp_retrans_stamp_cleanup() helper, because this
+two-line sequence already appears in 3 places and is about to appear
+in 2 more as a result of this bug fix patch series. Once this bug fix
+patches series in the net branch makes it into the net-next branch
+we'll update the 3 other call sites to use the new helper.
+
+This is a long-standing issue. The Fixes tag below is chosen to be the
+oldest commit at which the patch will apply cleanly, which is from
+Linux v3.5 in 2012.
+
+Fixes: 1fbc340514fc ("tcp: early retransmit: tcp_enter_recovery()")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241001200517.2756803-3-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 75e954590bdd5..5923261312912 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2343,6 +2343,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
+       return false;
+ }
+ 
++/* If loss recovery is finished and there are no retransmits out in the
++ * network, then we clear retrans_stamp so that upon the next loss recovery
++ * retransmits_timed_out() and timestamp-undo are using the correct value.
++ */
++static void tcp_retrans_stamp_cleanup(struct sock *sk)
++{
++      if (!tcp_any_retrans_done(sk))
++              tcp_sk(sk)->retrans_stamp = 0;
++}
++
+ static void DBGUNDO(struct sock *sk, const char *msg)
+ {
+ #if FASTRETRANS_DEBUG > 1
+@@ -2685,6 +2695,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
+       struct tcp_sock *tp = tcp_sk(sk);
+       int mib_idx;
+ 
++      /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */
++      tcp_retrans_stamp_cleanup(sk);
++
+       if (tcp_is_reno(tp))
+               mib_idx = LINUX_MIB_TCPRENORECOVERY;
+       else
+-- 
+2.43.0
+
diff --git a/queue-5.4/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch b/queue-5.4/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch

new file mode 100644 (file)

index 0000000..77dda43
--- /dev/null
+++ b/queue-5.4/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch
@@ -0,0 +1,96 @@
+From 98b4903d75a361d71da3a0cf542f57ff209e7b3a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 20:05:15 +0000
+Subject: tcp: fix to allow timestamp undo if no retransmits were sent
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit e37ab7373696e650d3b6262a5b882aadad69bb9e ]
+
+Fix the TCP loss recovery undo logic in tcp_packet_delayed() so that
+it can trigger undo even if TSQ prevents a fast recovery episode from
+reaching tcp_retransmit_skb().
+
+Geumhwan Yu <geumhwan.yu@samsung.com> recently reported that after
+this commit from 2019:
+
+commit bc9f38c8328e ("tcp: avoid unconditional congestion window undo
+on SYN retransmit")
+
+...and before this fix we could have buggy scenarios like the
+following:
+
++ Due to reordering, a TCP connection receives some SACKs and enters a
+  spurious fast recovery.
+
++ TSQ prevents all invocations of tcp_retransmit_skb(), because many
+  skbs are queued in lower layers of the sending machine's network
+  stack; thus tp->retrans_stamp remains 0.
+
++ The connection receives a TCP timestamp ECR value echoing a
+  timestamp before the fast recovery, indicating that the fast
+  recovery was spurious.
+
++ The connection fails to undo the spurious fast recovery because
+  tp->retrans_stamp is 0, and thus tcp_packet_delayed() returns false,
+  due to the new logic in the 2019 commit: commit bc9f38c8328e ("tcp:
+  avoid unconditional congestion window undo on SYN retransmit")
+
+This fix tweaks the logic to be more similar to the
+tcp_packet_delayed() logic before bc9f38c8328e, except that we take
+care not to be fooled by the FLAG_SYN_ACKED code path zeroing out
+tp->retrans_stamp (the bug noted and fixed by Yuchung in
+bc9f38c8328e).
+
+Note that this returns the high-level behavior of tcp_packet_delayed()
+to again match the comment for the function, which says: "Nothing was
+retransmitted or returned timestamp is less than timestamp of the
+first retransmission." Note that this comment is in the original
+2005-04-16 Linux git commit, so this is evidently long-standing
+behavior.
+
+Fixes: bc9f38c8328e ("tcp: avoid unconditional congestion window undo on SYN retransmit")
+Reported-by: Geumhwan Yu <geumhwan.yu@samsung.com>
+Diagnosed-by: Geumhwan Yu <geumhwan.yu@samsung.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241001200517.2756803-2-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index cf6221e9fda50..75e954590bdd5 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2294,8 +2294,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
+  */
+ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
+ {
+-      return tp->retrans_stamp &&
+-             tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
++      const struct sock *sk = (const struct sock *)tp;
++
++      if (tp->retrans_stamp &&
++          tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
++              return true;  /* got echoed TS before first retransmission */
++
++      /* Check if nothing was retransmitted (retrans_stamp==0), which may
++       * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
++       * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
++       * retrans_stamp even if we had retransmitted the SYN.
++       */
++      if (!tp->retrans_stamp &&          /* no record of a retransmit/SYN? */
++          sk->sk_state != TCP_SYN_SENT)  /* not the FLAG_SYN_ACKED case? */
++              return true;  /* nothing was retransmitted */
++
++      return false;
+ }
+ 
+ /* Undo procedures. */
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 13 Oct 2024 02:48:15 +0000 (22:48 -0400)
queue-5.4/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ice-fix-vlan-replay-after-reset.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/igb-do-not-bring-the-device-up-after-non-fatal-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-annotate-lockless-accesses-to-sk-sk_ack_backlog.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-annotate-lockless-accesses-to-sk-sk_max_ack_back.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-ibm-emac-mal-fix-wrong-goto.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-accept-tca_stab-only-for-root-qdisc.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ppp-fix-ppp_async_encode-illegal-access.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/slip-make-slhc_remember-more-robust-against-maliciou.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sunrpc-fix-integer-overflow-in-decode_rc_list.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch	[new file with mode: 0644]	patch \| blob