Fixes for 5.15

author Sasha Levin <sashal@kernel.org>

Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)

committer Sasha Levin <sashal@kernel.org>

Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)
author Sasha Levin <sashal@kernel.org>
Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)
committer Sasha Levin <sashal@kernel.org>
Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)
diff --git a/queue-5.15/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch b/queue-5.15/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch

new file mode 100644 (file)

index 0000000..6ce937d
--- /dev/null
+++ b/queue-5.15/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch
@@ -0,0 +1,51 @@
+From 1365b51ff690cb860a94e3f2850e0e5069fcc410 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Sep 2024 13:26:21 -0400
+Subject: Bluetooth: RFCOMM: FIX possible deadlock in rfcomm_sk_state_change
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 08d1914293dae38350b8088980e59fbc699a72fe ]
+
+rfcomm_sk_state_change attempts to use sock_lock so it must never be
+called with it locked but rfcomm_sock_ioctl always attempt to lock it
+causing the following trace:
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.8.0-syzkaller-08951-gfe46a7dd189e #0 Not tainted
+------------------------------------------------------
+syz-executor386/5093 is trying to acquire lock:
+ffff88807c396258 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1671 [inline]
+ffff88807c396258 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0}, at: rfcomm_sk_state_change+0x5b/0x310 net/bluetooth/rfcomm/sock.c:73
+
+but task is already holding lock:
+ffff88807badfd28 (&d->lock){+.+.}-{3:3}, at: __rfcomm_dlc_close+0x226/0x6a0 net/bluetooth/rfcomm/core.c:491
+
+Reported-by: syzbot+d7ce59b06b3eb14fd218@syzkaller.appspotmail.com
+Tested-by: syzbot+d7ce59b06b3eb14fd218@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d7ce59b06b3eb14fd218
+Fixes: 3241ad820dbb ("[Bluetooth] Add timestamp support to L2CAP, RFCOMM and SCO")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/rfcomm/sock.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index 4397e14ff560f..44c998e4a9e6c 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -876,9 +876,7 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
+ 
+       if (err == -ENOIOCTLCMD) {
+ #ifdef CONFIG_BT_RFCOMM_TTY
+-              lock_sock(sk);
+               err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg);
+-              release_sock(sk);
+ #else
+               err = -EOPNOTSUPP;
+ #endif
+-- 
+2.43.0
+
diff --git a/queue-5.15/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch b/queue-5.15/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch

new file mode 100644 (file)

index 0000000..1b29d79
--- /dev/null
+++ b/queue-5.15/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch
@@ -0,0 +1,44 @@
+From bee62a590d2f45f3c1d4364db58c6f88bac5291c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 16:14:44 +0800
+Subject: gpio: aspeed: Add the flush write to ensure the write complete.
+
+From: Billy Tsai <billy_tsai@aspeedtech.com>
+
+[ Upstream commit 1bb5a99e1f3fd27accb804aa0443a789161f843c ]
+
+Performing a dummy read ensures that the register write operation is fully
+completed, mitigating any potential bus delays that could otherwise impact
+the frequency of bitbang usage. E.g., if the JTAG application uses GPIO to
+control the JTAG pins (TCK, TMS, TDI, TDO, and TRST), and the application
+sets the TCK clock to 1 MHz, the GPIO's high/low transitions will rely on
+a delay function to ensure the clock frequency does not exceed 1 MHz.
+However, this can lead to rapid toggling of the GPIO because the write
+operation is POSTed and does not wait for a bus acknowledgment.
+
+Fixes: 361b79119a4b ("gpio: Add Aspeed driver")
+Reviewed-by: Andrew Jeffery <andrew@codeconstruct.com.au>
+Signed-off-by: Billy Tsai <billy_tsai@aspeedtech.com>
+Link: https://lore.kernel.org/r/20241008081450.1490955-2-billy_tsai@aspeedtech.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-aspeed.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
+index 42d3e1cf73528..3cfb2c6103c6b 100644
+--- a/drivers/gpio/gpio-aspeed.c
++++ b/drivers/gpio/gpio-aspeed.c
+@@ -404,6 +404,8 @@ static void __aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
+       gpio->dcache[GPIO_BANK(offset)] = reg;
+ 
+       iowrite32(reg, addr);
++      /* Flush write */
++      ioread32(addr);
+ }
+ 
+ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
+-- 
+2.43.0
+
diff --git a/queue-5.15/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch b/queue-5.15/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch

new file mode 100644 (file)

index 0000000..583bc99
--- /dev/null
+++ b/queue-5.15/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch
@@ -0,0 +1,37 @@
+From eb1bb0f6ce04535f390cb62e8bb2fd585bcdeb9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 16:14:45 +0800
+Subject: gpio: aspeed: Use devm_clk api to manage clock source
+
+From: Billy Tsai <billy_tsai@aspeedtech.com>
+
+[ Upstream commit a6191a3d18119184237f4ee600039081ad992320 ]
+
+Replace of_clk_get with devm_clk_get_enabled to manage the clock source.
+
+Fixes: 5ae4cb94b313 ("gpio: aspeed: Add debounce support")
+Reviewed-by: Andrew Jeffery <andrew@codeconstruct.com.au>
+Signed-off-by: Billy Tsai <billy_tsai@aspeedtech.com>
+Link: https://lore.kernel.org/r/20241008081450.1490955-3-billy_tsai@aspeedtech.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-aspeed.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
+index 3cfb2c6103c6b..21deb228c7d7b 100644
+--- a/drivers/gpio/gpio-aspeed.c
++++ b/drivers/gpio/gpio-aspeed.c
+@@ -1156,7 +1156,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
+       if (!gpio_id)
+               return -EINVAL;
+ 
+-      gpio->clk = of_clk_get(pdev->dev.of_node, 0);
++      gpio->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+       if (IS_ERR(gpio->clk)) {
+               dev_warn(&pdev->dev,
+                               "Failed to get clock from devicetree, debouncing disabled\n");
+-- 
+2.43.0
+
diff --git a/queue-5.15/i40e-fix-macvlan-leak-by-synchronizing-access-to-mac.patch b/queue-5.15/i40e-fix-macvlan-leak-by-synchronizing-access-to-mac.patch

new file mode 100644 (file)

index 0000000..004ed1f
--- /dev/null
+++ b/queue-5.15/i40e-fix-macvlan-leak-by-synchronizing-access-to-mac.patch
@@ -0,0 +1,73 @@
+From 6fe5d4d5fddc27b2af5d383c801e02d6152deebd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Sep 2024 11:12:19 +0200
+Subject: i40e: Fix macvlan leak by synchronizing access to mac_filter_hash
+
+From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+
+[ Upstream commit dac6c7b3d33756d6ce09f00a96ea2ecd79fae9fb ]
+
+This patch addresses a macvlan leak issue in the i40e driver caused by
+concurrent access to vsi->mac_filter_hash. The leak occurs when multiple
+threads attempt to modify the mac_filter_hash simultaneously, leading to
+inconsistent state and potential memory leaks.
+
+To fix this, we now wrap the calls to i40e_del_mac_filter() and zeroing
+vf->default_lan_addr.addr with spin_lock/unlock_bh(&vsi->mac_filter_hash_lock),
+ensuring atomic operations and preventing concurrent access.
+
+Additionally, we add lockdep_assert_held(&vsi->mac_filter_hash_lock) in
+i40e_add_mac_filter() to help catch similar issues in the future.
+
+Reproduction steps:
+1. Spawn VFs and configure port vlan on them.
+2. Trigger concurrent macvlan operations (e.g., adding and deleting
+       portvlan and/or mac filters).
+3. Observe the potential memory leak and inconsistent state in the
+       mac_filter_hash.
+
+This synchronization ensures the integrity of the mac_filter_hash and prevents
+the described leak.
+
+Fixes: fed0d9f13266 ("i40e: Fix VF's MAC Address change on VM")
+Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c        | 1 +
+ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 ++
+ 2 files changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index da4022a211f62..c1f21713ab8d1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1712,6 +1712,7 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
+       struct hlist_node *h;
+       int bkt;
+ 
++      lockdep_assert_held(&vsi->mac_filter_hash_lock);
+       if (vsi->info.pvid)
+               return i40e_add_filter(vsi, macaddr,
+                                      le16_to_cpu(vsi->info.pvid));
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index d1635df17e46f..65a29f955d9c4 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -2215,8 +2215,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+               vfres->vsi_res[0].qset_handle
+                                         = le16_to_cpu(vsi->info.qs_handle[0]);
+               if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) && !vf->pf_set_mac) {
++                      spin_lock_bh(&vsi->mac_filter_hash_lock);
+                       i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
+                       eth_zero_addr(vf->default_lan_addr.addr);
++                      spin_unlock_bh(&vsi->mac_filter_hash_lock);
+               }
+               ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+                               vf->default_lan_addr.addr);
+-- 
+2.43.0
+
diff --git a/queue-5.15/ice-fix-netif_is_ice-in-safe-mode.patch b/queue-5.15/ice-fix-netif_is_ice-in-safe-mode.patch

new file mode 100644 (file)

index 0000000..74d84b5
--- /dev/null
+++ b/queue-5.15/ice-fix-netif_is_ice-in-safe-mode.patch
@@ -0,0 +1,43 @@
+From f1ae8ff52aaf8c1e72cf4a4d807c3085095fdeb7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Sep 2024 12:04:24 +0200
+Subject: ice: Fix netif_is_ice() in Safe Mode
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit 8e60dbcbaaa177dacef55a61501790e201bf8c88 ]
+
+netif_is_ice() works by checking the pointer to netdev ops. However, it
+only checks for the default ice_netdev_ops, not ice_netdev_safe_mode_ops,
+so in Safe Mode it always returns false, which is unintuitive. While it
+doesn't look like netif_is_ice() is currently being called anywhere in Safe
+Mode, this could change and potentially lead to unexpected behaviour.
+
+Fixes: df006dd4b1dc ("ice: Add initial support framework for LAG")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Reviewed-by: Brett Creeley <brett.creeley@amd.com>
+Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 3cc344d5228b6..329bf24a3f0e5 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -55,7 +55,8 @@ static void ice_vsi_release_all(struct ice_pf *pf);
+ 
+ bool netif_is_ice(struct net_device *dev)
+ {
+-      return dev && (dev->netdev_ops == &ice_netdev_ops);
++      return dev && (dev->netdev_ops == &ice_netdev_ops ||
++                     dev->netdev_ops == &ice_netdev_safe_mode_ops);
+ }
+ 
+ /**
+-- 
+2.43.0
+
diff --git a/queue-5.15/ice-fix-vlan-replay-after-reset.patch b/queue-5.15/ice-fix-vlan-replay-after-reset.patch

new file mode 100644 (file)

index 0000000..6495780
--- /dev/null
+++ b/queue-5.15/ice-fix-vlan-replay-after-reset.patch
@@ -0,0 +1,56 @@
+From bf8c76ffb46a676048a8408b4fe5330025a3be51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Sep 2024 14:02:56 -0400
+Subject: ice: fix VLAN replay after reset
+
+From: Dave Ertman <david.m.ertman@intel.com>
+
+[ Upstream commit 0eae2c136cb624e4050092feb59f18159b4f2512 ]
+
+There is a bug currently when there are more than one VLAN defined
+and any reset that affects the PF is initiated, after the reset rebuild
+no traffic will pass on any VLAN but the last one created.
+
+This is caused by the iteration though the VLANs during replay each
+clearing the vsi_map bitmap of the VSI that is being replayed.  The
+problem is that during rhe replay, the pointer to the vsi_map bitmap
+is used by each successive vlan to determine if it should be replayed
+on this VSI.
+
+The logic was that the replay of the VLAN would replace the bit in the map
+before the next VLAN would iterate through.  But, since the replay copies
+the old bitmap pointer to filt_replay_rules and creates a new one for the
+recreated VLANS, it does not do this, and leaves the old bitmap broken
+to be used to replay the remaining VLANs.
+
+Since the old bitmap will be cleaned up in post replay cleanup, there is
+no need to alter it and break following VLAN replay, so don't clear the
+bit.
+
+Fixes: 334cb0626de1 ("ice: Implement VSI replay framework")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 128c67c6de777..08540c12a7920 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -2812,8 +2812,6 @@ ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+               if (!itr->vsi_list_info ||
+                   !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+                       continue;
+-              /* Clearing it so that the logic can add it back */
+-              clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+               f_entry.fltr_info.vsi_handle = vsi_handle;
+               f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+               /* update the src in case it is VSI num */
+-- 
+2.43.0
+
diff --git a/queue-5.15/igb-do-not-bring-the-device-up-after-non-fatal-error.patch b/queue-5.15/igb-do-not-bring-the-device-up-after-non-fatal-error.patch

new file mode 100644 (file)

index 0000000..1ed9ae4
--- /dev/null
+++ b/queue-5.15/igb-do-not-bring-the-device-up-after-non-fatal-error.patch
@@ -0,0 +1,96 @@
+From 4cfa442cc3961216299f02a173a8a5a59bd40a88 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Sep 2024 15:06:01 -0600
+Subject: igb: Do not bring the device up after non-fatal error
+
+From: Mohamed Khalfella <mkhalfella@purestorage.com>
+
+[ Upstream commit 330a699ecbfc9c26ec92c6310686da1230b4e7eb ]
+
+Commit 004d25060c78 ("igb: Fix igb_down hung on surprise removal")
+changed igb_io_error_detected() to ignore non-fatal pcie errors in order
+to avoid hung task that can happen when igb_down() is called multiple
+times. This caused an issue when processing transient non-fatal errors.
+igb_io_resume(), which is called after igb_io_error_detected(), assumes
+that device is brought down by igb_io_error_detected() if the interface
+is up. This resulted in panic with stacktrace below.
+
+[ T3256] igb 0000:09:00.0 haeth0: igb: haeth0 NIC Link is Down
+[  T292] pcieport 0000:00:1c.5: AER: Uncorrected (Non-Fatal) error received: 0000:09:00.0
+[  T292] igb 0000:09:00.0: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Requester ID)
+[  T292] igb 0000:09:00.0:   device [8086:1537] error status/mask=00004000/00000000
+[  T292] igb 0000:09:00.0:    [14] CmpltTO [  200.105524,009][  T292] igb 0000:09:00.0: AER:   TLP Header: 00000000 00000000 00000000 00000000
+[  T292] pcieport 0000:00:1c.5: AER: broadcast error_detected message
+[  T292] igb 0000:09:00.0: Non-correctable non-fatal error reported.
+[  T292] pcieport 0000:00:1c.5: AER: broadcast mmio_enabled message
+[  T292] pcieport 0000:00:1c.5: AER: broadcast resume message
+[  T292] ------------[ cut here ]------------
+[  T292] kernel BUG at net/core/dev.c:6539!
+[  T292] invalid opcode: 0000 [#1] PREEMPT SMP
+[  T292] RIP: 0010:napi_enable+0x37/0x40
+[  T292] Call Trace:
+[  T292]  <TASK>
+[  T292]  ? die+0x33/0x90
+[  T292]  ? do_trap+0xdc/0x110
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? do_error_trap+0x70/0xb0
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? exc_invalid_op+0x4e/0x70
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  ? asm_exc_invalid_op+0x16/0x20
+[  T292]  ? napi_enable+0x37/0x40
+[  T292]  igb_up+0x41/0x150
+[  T292]  igb_io_resume+0x25/0x70
+[  T292]  report_resume+0x54/0x70
+[  T292]  ? report_frozen_detected+0x20/0x20
+[  T292]  pci_walk_bus+0x6c/0x90
+[  T292]  ? aer_print_port_info+0xa0/0xa0
+[  T292]  pcie_do_recovery+0x22f/0x380
+[  T292]  aer_process_err_devices+0x110/0x160
+[  T292]  aer_isr+0x1c1/0x1e0
+[  T292]  ? disable_irq_nosync+0x10/0x10
+[  T292]  irq_thread_fn+0x1a/0x60
+[  T292]  irq_thread+0xe3/0x1a0
+[  T292]  ? irq_set_affinity_notifier+0x120/0x120
+[  T292]  ? irq_affinity_notify+0x100/0x100
+[  T292]  kthread+0xe2/0x110
+[  T292]  ? kthread_complete_and_exit+0x20/0x20
+[  T292]  ret_from_fork+0x2d/0x50
+[  T292]  ? kthread_complete_and_exit+0x20/0x20
+[  T292]  ret_from_fork_asm+0x11/0x20
+[  T292]  </TASK>
+
+To fix this issue igb_io_resume() checks if the interface is running and
+the device is not down this means igb_io_error_detected() did not bring
+the device down and there is no need to bring it up.
+
+Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Reviewed-by: Yuanyuan Zhong <yzhong@purestorage.com>
+Fixes: 004d25060c78 ("igb: Fix igb_down hung on surprise removal")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 559ddb40347cc..f3a433b4c7cdb 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -9539,6 +9539,10 @@ static void igb_io_resume(struct pci_dev *pdev)
+       struct igb_adapter *adapter = netdev_priv(netdev);
+ 
+       if (netif_running(netdev)) {
++              if (!test_bit(__IGB_DOWN, &adapter->state)) {
++                      dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n");
++                      return;
++              }
+               if (igb_up(adapter)) {
+                       dev_err(&pdev->dev, "igb_up failed after reset\n");
+                       return;
+-- 
+2.43.0
+
diff --git a/queue-5.15/mctp-handle-error-of-rtnl_register_module.patch b/queue-5.15/mctp-handle-error-of-rtnl_register_module.patch

new file mode 100644 (file)

index 0000000..83c1a2f
--- /dev/null
+++ b/queue-5.15/mctp-handle-error-of-rtnl_register_module.patch
@@ -0,0 +1,217 @@
+From fabbbba1d51a3e6fb2ded2978a8684991c7562d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 11:47:35 -0700
+Subject: mctp: Handle error of rtnl_register_module().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit d51705614f668254cc5def7490df76f9680b4659 ]
+
+Since introduced, mctp has been ignoring the returned value of
+rtnl_register_module(), which could fail silently.
+
+Handling the error allows users to view a module as an all-or-nothing
+thing in terms of the rtnetlink functionality.  This prevents syzkaller
+from reporting spurious errors from its tests, where OOM often occurs
+and module is automatically loaded.
+
+Let's handle the errors by rtnl_register_many().
+
+Fixes: 583be982d934 ("mctp: Add device handling and netlink interface")
+Fixes: 831119f88781 ("mctp: Add neighbour netlink interface")
+Fixes: 06d2f4c583a7 ("mctp: Add netlink route management")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Jeremy Kerr <jk@codeconstruct.com.au>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/mctp.h |  2 +-
+ net/mctp/af_mctp.c |  6 +++++-
+ net/mctp/device.c  | 30 ++++++++++++++++++------------
+ net/mctp/neigh.c   | 31 +++++++++++++++++++------------
+ net/mctp/route.c   | 33 +++++++++++++++++++++++----------
+ 5 files changed, 66 insertions(+), 36 deletions(-)
+
+diff --git a/include/net/mctp.h b/include/net/mctp.h
+index ffd2c23bd76d5..8c225091e46cf 100644
+--- a/include/net/mctp.h
++++ b/include/net/mctp.h
+@@ -226,7 +226,7 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+ int mctp_routes_init(void);
+ void mctp_routes_exit(void);
+ 
+-void mctp_device_init(void);
++int mctp_device_init(void);
+ void mctp_device_exit(void);
+ 
+ #endif /* __NET_MCTP_H */
+diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
+index 77137a8627d06..0ca031866ce1a 100644
+--- a/net/mctp/af_mctp.c
++++ b/net/mctp/af_mctp.c
+@@ -384,10 +384,14 @@ static __init int mctp_init(void)
+       if (rc)
+               goto err_unreg_routes;
+ 
+-      mctp_device_init();
++      rc = mctp_device_init();
++      if (rc)
++              goto err_unreg_neigh;
+ 
+       return 0;
+ 
++err_unreg_neigh:
++      mctp_neigh_exit();
+ err_unreg_routes:
+       mctp_routes_exit();
+ err_unreg_proto:
+diff --git a/net/mctp/device.c b/net/mctp/device.c
+index b9f38e765f619..c00a2550e2e0e 100644
+--- a/net/mctp/device.c
++++ b/net/mctp/device.c
+@@ -399,25 +399,31 @@ static struct notifier_block mctp_dev_nb = {
+       .priority = ADDRCONF_NOTIFY_PRIORITY,
+ };
+ 
+-void __init mctp_device_init(void)
++static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = {
++      {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0},
++};
++
++int __init mctp_device_init(void)
+ {
+-      register_netdevice_notifier(&mctp_dev_nb);
++      int err;
+ 
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+-                           NULL, mctp_dump_addrinfo, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+-                           mctp_rtm_newaddr, NULL, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
+-                           mctp_rtm_deladdr, NULL, 0);
++      register_netdevice_notifier(&mctp_dev_nb);
+       rtnl_af_register(&mctp_af_ops);
++
++      err = rtnl_register_many(mctp_device_rtnl_msg_handlers);
++      if (err) {
++              rtnl_af_unregister(&mctp_af_ops);
++              unregister_netdevice_notifier(&mctp_dev_nb);
++      }
++
++      return err;
+ }
+ 
+ void __exit mctp_device_exit(void)
+ {
++      rtnl_unregister_many(mctp_device_rtnl_msg_handlers);
+       rtnl_af_unregister(&mctp_af_ops);
+-      rtnl_unregister(PF_MCTP, RTM_DELADDR);
+-      rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+-      rtnl_unregister(PF_MCTP, RTM_GETADDR);
+-
+       unregister_netdevice_notifier(&mctp_dev_nb);
+ }
+diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
+index 90ed2f02d1fb0..bc75a263719c7 100644
+--- a/net/mctp/neigh.c
++++ b/net/mctp/neigh.c
+@@ -321,22 +321,29 @@ static struct pernet_operations mctp_net_ops = {
+       .exit = mctp_neigh_net_exit,
+ };
+ 
++static const struct rtnl_msg_handler mctp_neigh_rtnl_msg_handlers[] = {
++      {THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, mctp_rtm_newneigh, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_DELNEIGH, mctp_rtm_delneigh, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_GETNEIGH, NULL, mctp_rtm_getneigh, 0},
++};
++
+ int __init mctp_neigh_init(void)
+ {
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+-                           mctp_rtm_newneigh, NULL, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+-                           mctp_rtm_delneigh, NULL, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+-                           NULL, mctp_rtm_getneigh, 0);
+-
+-      return register_pernet_subsys(&mctp_net_ops);
++      int err;
++
++      err = register_pernet_subsys(&mctp_net_ops);
++      if (err)
++              return err;
++
++      err = rtnl_register_many(mctp_neigh_rtnl_msg_handlers);
++      if (err)
++              unregister_pernet_subsys(&mctp_net_ops);
++
++      return err;
+ }
+ 
+-void __exit mctp_neigh_exit(void)
++void mctp_neigh_exit(void)
+ {
++      rtnl_unregister_many(mctp_neigh_rtnl_msg_handlers);
+       unregister_pernet_subsys(&mctp_net_ops);
+-      rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+-      rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+-      rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
+ }
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index 5ef6b3b0a3d99..48d32bfd38636 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -1134,25 +1134,38 @@ static struct pernet_operations mctp_net_ops = {
+       .exit = mctp_routes_net_exit,
+ };
+ 
++static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
++      {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
++      {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
++};
++
+ int __init mctp_routes_init(void)
+ {
++      int err;
++
+       dev_add_pack(&mctp_packet_type);
+ 
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+-                           NULL, mctp_dump_rtinfo, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+-                           mctp_newroute, NULL, 0);
+-      rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+-                           mctp_delroute, NULL, 0);
++      err = register_pernet_subsys(&mctp_net_ops);
++      if (err)
++              goto err_pernet;
++
++      err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
++      if (err)
++              goto err_rtnl;
+ 
+-      return register_pernet_subsys(&mctp_net_ops);
++      return 0;
++
++err_rtnl:
++      unregister_pernet_subsys(&mctp_net_ops);
++err_pernet:
++      dev_remove_pack(&mctp_packet_type);
++      return err;
+ }
+ 
+ void mctp_routes_exit(void)
+ {
++      rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
+       unregister_pernet_subsys(&mctp_net_ops);
+-      rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+-      rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+-      rtnl_unregister(PF_MCTP, RTM_GETROUTE);
+       dev_remove_pack(&mctp_packet_type);
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch

new file mode 100644 (file)

index 0000000..49823f3
--- /dev/null
+++ b/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch
@@ -0,0 +1,419 @@
+From c31f27d84d20d5cb701cd33588f8ffef1988f447 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Mar 2022 14:45:51 -0600
+Subject: net: Add l3mdev index to flow struct and avoid oif reset for port
+ devices
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ]
+
+The fundamental premise of VRF and l3mdev core code is binding a socket
+to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope.
+Legacy code resets flowi_oif to the l3mdev losing any original port
+device binding. Ben (among others) has demonstrated use cases where the
+original port device binding is important and needs to be retained.
+This patch handles that by adding a new entry to the common flow struct
+that can indicate the l3mdev index for later rule and table matching
+avoiding the need to reset flowi_oif.
+
+In addition to allowing more use cases that require port device binds,
+this patch brings a few datapath simplications:
+
+1. l3mdev_fib_rule_match is only called when walking fib rules and
+   always after l3mdev_update_flow. That allows an optimization to bail
+   early for non-VRF type uses cases when flowi_l3mdev is not set. Also,
+   only that index needs to be checked for the FIB table id.
+
+2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev
+   (e.g., VRF) device. By resetting flowi_oif only for this case the
+   FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed,
+   removing several checks in the datapath. The flowi_iif path can be
+   simplified to only be called if the it is not loopback (loopback can
+   not be assigned to an L3 domain) and the l3mdev index is not already
+   set.
+
+3. Avoid another device lookup in the output path when the fib lookup
+   returns a reject failure.
+
+Note: 2 functional tests for local traffic with reject fib rules are
+updated to reflect the new direct failure at FIB lookup time for ping
+rather than the failure on packet path. The current code fails like this:
+
+    HINT: Fails since address on vrf device is out of device scope
+    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
+    ping: Warning: source address might be selected on device other than: eth1
+    PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data.
+
+    --- 172.16.3.1 ping statistics ---
+    1 packets transmitted, 0 received, 100% packet loss, time 0ms
+
+where the test now directly fails:
+
+    HINT: Fails since address on vrf device is out of device scope
+    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
+    ping: connect: No route to host
+
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Tested-by: Ben Greear <greearb@candelatech.com>
+Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 05ef7055debc ("netfilter: fib: check correct rtable in vrf setups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c                         |  7 ++--
+ include/net/flow.h                        |  6 +++-
+ net/ipv4/fib_frontend.c                   |  7 ++--
+ net/ipv4/fib_semantics.c                  |  2 +-
+ net/ipv4/fib_trie.c                       |  7 ++--
+ net/ipv4/route.c                          |  4 +--
+ net/ipv4/xfrm4_policy.c                   |  4 +--
+ net/ipv6/ip6_output.c                     |  3 +-
+ net/ipv6/route.c                          | 12 -------
+ net/ipv6/xfrm6_policy.c                   |  3 +-
+ net/l3mdev/l3mdev.c                       | 43 +++++++++--------------
+ tools/testing/selftests/net/fcnal-test.sh |  2 +-
+ 12 files changed, 37 insertions(+), 63 deletions(-)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 091dd7caf10cc..85f5d78ff9ac0 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -471,14 +471,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
+ 
+       memset(&fl6, 0, sizeof(fl6));
+       /* needed to match OIF rule */
+-      fl6.flowi6_oif = dev->ifindex;
++      fl6.flowi6_l3mdev = dev->ifindex;
+       fl6.flowi6_iif = LOOPBACK_IFINDEX;
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = ip6_flowinfo(iph);
+       fl6.flowi6_mark = skb->mark;
+       fl6.flowi6_proto = iph->nexthdr;
+-      fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+ 
+       dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
+       if (IS_ERR(dst) || dst == dst_null)
+@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
+ 
+       memset(&fl4, 0, sizeof(fl4));
+       /* needed to match OIF rule */
+-      fl4.flowi4_oif = vrf_dev->ifindex;
++      fl4.flowi4_l3mdev = vrf_dev->ifindex;
+       fl4.flowi4_iif = LOOPBACK_IFINDEX;
+       fl4.flowi4_tos = RT_TOS(ip4h->tos);
+-      fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
++      fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+       fl4.flowi4_proto = ip4h->protocol;
+       fl4.daddr = ip4h->daddr;
+       fl4.saddr = ip4h->saddr;
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 776bacc96242a..079cc493fe67d 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -29,6 +29,7 @@ struct flowi_tunnel {
+ struct flowi_common {
+       int     flowic_oif;
+       int     flowic_iif;
++      int     flowic_l3mdev;
+       __u32   flowic_mark;
+       __u8    flowic_tos;
+       __u8    flowic_scope;
+@@ -36,7 +37,6 @@ struct flowi_common {
+       __u8    flowic_flags;
+ #define FLOWI_FLAG_ANYSRC             0x01
+ #define FLOWI_FLAG_KNOWN_NH           0x02
+-#define FLOWI_FLAG_SKIP_NH_OIF                0x04
+       __u32   flowic_secid;
+       kuid_t  flowic_uid;
+       __u32           flowic_multipath_hash;
+@@ -65,6 +65,7 @@ struct flowi4 {
+       struct flowi_common     __fl_common;
+ #define flowi4_oif            __fl_common.flowic_oif
+ #define flowi4_iif            __fl_common.flowic_iif
++#define flowi4_l3mdev         __fl_common.flowic_l3mdev
+ #define flowi4_mark           __fl_common.flowic_mark
+ #define flowi4_tos            __fl_common.flowic_tos
+ #define flowi4_scope          __fl_common.flowic_scope
+@@ -97,6 +98,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
+ {
+       fl4->flowi4_oif = oif;
+       fl4->flowi4_iif = LOOPBACK_IFINDEX;
++      fl4->flowi4_l3mdev = 0;
+       fl4->flowi4_mark = mark;
+       fl4->flowi4_tos = tos;
+       fl4->flowi4_scope = scope;
+@@ -127,6 +129,7 @@ struct flowi6 {
+       struct flowi_common     __fl_common;
+ #define flowi6_oif            __fl_common.flowic_oif
+ #define flowi6_iif            __fl_common.flowic_iif
++#define flowi6_l3mdev         __fl_common.flowic_l3mdev
+ #define flowi6_mark           __fl_common.flowic_mark
+ #define flowi6_scope          __fl_common.flowic_scope
+ #define flowi6_proto          __fl_common.flowic_proto
+@@ -156,6 +159,7 @@ struct flowi {
+       } u;
+ #define flowi_oif     u.__fl_common.flowic_oif
+ #define flowi_iif     u.__fl_common.flowic_iif
++#define flowi_l3mdev  u.__fl_common.flowic_l3mdev
+ #define flowi_mark    u.__fl_common.flowic_mark
+ #define flowi_tos     u.__fl_common.flowic_tos
+ #define flowi_scope   u.__fl_common.flowic_scope
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index 5ca9c8f1610a7..545dd994f0609 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
+               bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
+               struct flowi4 fl4 = {
+                       .flowi4_iif = LOOPBACK_IFINDEX,
+-                      .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
++                      .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
+                       .daddr = ip_hdr(skb)->saddr,
+                       .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
+                       .flowi4_scope = scope,
+@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+       bool dev_match;
+ 
+       fl4.flowi4_oif = 0;
+-      fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
+-      if (!fl4.flowi4_iif)
+-              fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
++      fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
++      fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+       fl4.daddr = src;
+       fl4.saddr = dst;
+       fl4.flowi4_tos = tos;
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 735901b8c9f69..3d00253afbb8d 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -2269,7 +2269,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
+ void fib_select_path(struct net *net, struct fib_result *res,
+                    struct flowi4 *fl4, const struct sk_buff *skb)
+ {
+-      if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
++      if (fl4->flowi4_oif)
+               goto check_saddr;
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index 0b74debeecbb1..ec0113ecf3949 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1428,11 +1428,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
+           !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+               return false;
+ 
+-      if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
+-              if (flp->flowi4_oif &&
+-                  flp->flowi4_oif != nhc->nhc_oif)
+-                      return false;
+-      }
++      if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
++              return false;
+ 
+       return true;
+ }
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 60fc35defdf8b..3522801885787 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2285,6 +2285,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+       /*
+        *      Now we are ready to route packet.
+        */
++      fl4.flowi4_l3mdev = 0;
+       fl4.flowi4_oif = 0;
+       fl4.flowi4_iif = dev->ifindex;
+       fl4.flowi4_mark = skb->mark;
+@@ -2761,8 +2762,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+               res->fi = NULL;
+               res->table = NULL;
+               if (fl4->flowi4_oif &&
+-                  (ipv4_is_multicast(fl4->daddr) ||
+-                  !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
++                  (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
+                       /* Apparently, routing tables are wrong. Assume,
+                        * that the destination is on link.
+                        *
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 9ebd54752e03b..4548a91acdc89 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
+       memset(fl4, 0, sizeof(*fl4));
+       fl4->daddr = daddr->a4;
+       fl4->flowi4_tos = tos;
+-      fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
++      fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+       fl4->flowi4_mark = mark;
+       if (saddr)
+               fl4->saddr = saddr->a4;
+ 
+-      fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
+-
+       rt = __ip_route_output_key(net, fl4);
+       if (!IS_ERR(rt))
+               return &rt->dst;
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index b37121f872bc9..9899bac5e1508 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1066,8 +1066,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+ #ifdef CONFIG_IPV6_SUBTREES
+           ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
+ #endif
+-         (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+-            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
++         (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+               dst_release(dst);
+               dst = NULL;
+       }
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index d937ee942a4fc..35d3f02ddf163 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
+       struct fib6_node *fn;
+       struct rt6_info *rt;
+ 
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              flags &= ~RT6_LOOKUP_F_IFACE;
+-
+       rcu_read_lock();
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ restart:
+@@ -2182,9 +2179,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+       saved_fn = fn;
+ 
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              oif = 0;
+-
+ redo_rt6_select:
+       rt6_select(net, fn, oif, res, strict);
+       if (res->f6i == net->ipv6.fib6_null_entry) {
+@@ -3060,12 +3054,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
+       struct fib6_info *rt;
+       struct fib6_node *fn;
+ 
+-      /* l3mdev_update_flow overrides oif if the device is enslaved; in
+-       * this case we must match on the real ingress device, so reset it
+-       */
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              fl6->flowi6_oif = skb->dev->ifindex;
+-
+       /* Get the "current" route for this destination and
+        * check if the redirect has come from appropriate router.
+        *
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 7c903e0e446cb..492b9692c0dc0 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
+       int err;
+ 
+       memset(&fl6, 0, sizeof(fl6));
+-      fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
+-      fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
++      fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+       fl6.flowi6_mark = mark;
+       memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
+       if (saddr)
+diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
+index 8b14a24f10404..ca10916340b09 100644
+--- a/net/l3mdev/l3mdev.c
++++ b/net/l3mdev/l3mdev.c
+@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+       struct net_device *dev;
+       int rc = 0;
+ 
+-      rcu_read_lock();
++      /* update flow ensures flowi_l3mdev is set when relevant */
++      if (!fl->flowi_l3mdev)
++              return 0;
+ 
+-      dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+-      if (dev && netif_is_l3_master(dev) &&
+-          dev->l3mdev_ops->l3mdev_fib_table) {
+-              arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+-              rc = 1;
+-              goto out;
+-      }
++      rcu_read_lock();
+ 
+-      dev = dev_get_by_index_rcu(net, fl->flowi_iif);
++      dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
+       if (dev && netif_is_l3_master(dev) &&
+           dev->l3mdev_ops->l3mdev_fib_table) {
+               arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+               rc = 1;
+-              goto out;
+       }
+ 
+-out:
+       rcu_read_unlock();
+ 
+       return rc;
+@@ -277,31 +271,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+ void l3mdev_update_flow(struct net *net, struct flowi *fl)
+ {
+       struct net_device *dev;
+-      int ifindex;
+ 
+       rcu_read_lock();
+ 
+       if (fl->flowi_oif) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+               if (dev) {
+-                      ifindex = l3mdev_master_ifindex_rcu(dev);
+-                      if (ifindex) {
+-                              fl->flowi_oif = ifindex;
+-                              fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+-                              goto out;
+-                      }
++                      if (!fl->flowi_l3mdev)
++                              fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
++
++                      /* oif set to L3mdev directs lookup to its table;
++                       * reset to avoid oif match in fib_lookup
++                       */
++                      if (netif_is_l3_master(dev))
++                              fl->flowi_oif = 0;
++                      goto out;
+               }
+       }
+ 
+-      if (fl->flowi_iif) {
++      if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+-              if (dev) {
+-                      ifindex = l3mdev_master_ifindex_rcu(dev);
+-                      if (ifindex) {
+-                              fl->flowi_iif = ifindex;
+-                              fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+-                      }
+-              }
++              if (dev)
++                      fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+       }
+ 
+ out:
+diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
+index 6ecdbbe1b54fb..bed85001da735 100755
+--- a/tools/testing/selftests/net/fcnal-test.sh
++++ b/tools/testing/selftests/net/fcnal-test.sh
+@@ -750,7 +750,7 @@ ipv4_ping_vrf()
+               log_start
+               show_hint "Fails since address on vrf device is out of device scope"
+               run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+-              log_test_addr ${a} $? 1 "ping local, device bind"
++              log_test_addr ${a} $? 2 "ping local, device bind"
+       done
+ 
+       #
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-dsa-b53-allow-lower-mtus-on-bcm5325-5365.patch b/queue-5.15/net-dsa-b53-allow-lower-mtus-on-bcm5325-5365.patch

new file mode 100644 (file)

index 0000000..2278e52
--- /dev/null
+++ b/queue-5.15/net-dsa-b53-allow-lower-mtus-on-bcm5325-5365.patch
@@ -0,0 +1,38 @@
+From 76b514e66a63a736f47e410bf4e8e8444f231d87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 10:47:20 +0200
+Subject: net: dsa: b53: allow lower MTUs on BCM5325/5365
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit e4b294f88a32438baf31762441f3dd1c996778be ]
+
+While BCM5325/5365 do not support jumbo frames, they do support slightly
+oversized frames, so do not error out if requesting a supported MTU for
+them.
+
+Fixes: 6ae5834b983a ("net: dsa: b53: add MTU configuration support")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
+index 3aa0a60d7c71e..cc030f8789053 100644
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -2225,7 +2225,7 @@ static int b53_change_mtu(struct dsa_switch *ds, int port, int mtu)
+       bool allow_10_100;
+ 
+       if (is5325(dev) || is5365(dev))
+-              return -EOPNOTSUPP;
++              return 0;
+ 
+       if (!dsa_is_cpu_port(ds, port))
+               return 0;
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-dsa-b53-fix-jumbo-frame-mtu-check.patch b/queue-5.15/net-dsa-b53-fix-jumbo-frame-mtu-check.patch

new file mode 100644 (file)

index 0000000..eff0c00
--- /dev/null
+++ b/queue-5.15/net-dsa-b53-fix-jumbo-frame-mtu-check.patch
@@ -0,0 +1,49 @@
+From ccc35cc3b6a5066142fdda9d2d2c68a974053072 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 10:47:17 +0200
+Subject: net: dsa: b53: fix jumbo frame mtu check
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 42fb3acf6826c6764ba79feb6e15229b43fd2f9f ]
+
+JMS_MIN_SIZE is the full ethernet frame length, while mtu is just the
+data payload size. Comparing these two meant that mtus between 1500 and
+1518 did not trigger enabling jumbo frames.
+
+So instead compare the set mtu ETH_DATA_LEN, which is equal to
+JMS_MIN_SIZE - ETH_HLEN - ETH_FCS_LEN;
+
+Also do a check that the requested mtu is actually greater than the
+minimum length, else we do not need to enable jumbo frames.
+
+In practice this only introduced a very small range of mtus that did not
+work properly. Newer chips allow 2000 byte large frames by default, and
+older chips allow 1536 bytes long, which is equivalent to an mtu of
+1514. So effectivly only mtus of 1515~1517 were broken.
+
+Fixes: 6ae5834b983a ("net: dsa: b53: add MTU configuration support")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
+index e23f184ffdda7..03047486b9b85 100644
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -2226,7 +2226,7 @@ static int b53_change_mtu(struct dsa_switch *ds, int port, int mtu)
+       if (!dsa_is_cpu_port(ds, port))
+               return 0;
+ 
+-      enable_jumbo = (mtu >= JMS_MIN_SIZE);
++      enable_jumbo = (mtu > ETH_DATA_LEN);
+       allow_10_100 = (dev->chip_id == BCM583XX_DEVICE_ID);
+ 
+       return b53_set_jumbo(dev, enable_jumbo, allow_10_100);
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-dsa-b53-fix-jumbo-frames-on-10-100-ports.patch b/queue-5.15/net-dsa-b53-fix-jumbo-frames-on-10-100-ports.patch

new file mode 100644 (file)

index 0000000..cb76de3
--- /dev/null
+++ b/queue-5.15/net-dsa-b53-fix-jumbo-frames-on-10-100-ports.patch
@@ -0,0 +1,42 @@
+From 1134a83b00aae41988700accb22e1bc56ed75778 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 10:47:21 +0200
+Subject: net: dsa: b53: fix jumbo frames on 10/100 ports
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 2f3dcd0d39affe5b9ba1c351ce0e270c8bdd5109 ]
+
+All modern chips support and need the 10_100 bit set for supporting jumbo
+frames on 10/100 ports, so instead of enabling it only for 583XX enable
+it for everything except bcm63xx, where the bit is writeable, but does
+nothing.
+
+Tested on BCM53115, where jumbo frames were dropped at 10/100 speeds
+without the bit set.
+
+Fixes: 6ae5834b983a ("net: dsa: b53: add MTU configuration support")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
+index cc030f8789053..df67262c30924 100644
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -2231,7 +2231,7 @@ static int b53_change_mtu(struct dsa_switch *ds, int port, int mtu)
+               return 0;
+ 
+       enable_jumbo = (mtu > ETH_DATA_LEN);
+-      allow_10_100 = (dev->chip_id == BCM583XX_DEVICE_ID);
++      allow_10_100 = !is63xx(dev);
+ 
+       return b53_set_jumbo(dev, enable_jumbo, allow_10_100);
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-dsa-b53-fix-max-mtu-for-1g-switches.patch b/queue-5.15/net-dsa-b53-fix-max-mtu-for-1g-switches.patch

new file mode 100644 (file)

index 0000000..9336eb1
--- /dev/null
+++ b/queue-5.15/net-dsa-b53-fix-max-mtu-for-1g-switches.patch
@@ -0,0 +1,58 @@
+From d1041fd2215060f338f3a371890b3626eaa18b14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 10:47:18 +0200
+Subject: net: dsa: b53: fix max MTU for 1g switches
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 680a8217dc00dc7e7da57888b3c053289b60eb2b ]
+
+JMS_MAX_SIZE is the ethernet frame length, not the MTU, which is payload
+without ethernet headers.
+
+According to the datasheets maximum supported frame length for most
+gigabyte swithes is 9720 bytes, so convert that to the expected MTU when
+using VLAN tagged frames.
+
+Fixes: 6ae5834b983a ("net: dsa: b53: add MTU configuration support")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
+index 03047486b9b85..be1550332326a 100644
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -27,6 +27,7 @@
+ #include <linux/phylink.h>
+ #include <linux/etherdevice.h>
+ #include <linux/if_bridge.h>
++#include <linux/if_vlan.h>
+ #include <net/dsa.h>
+ 
+ #include "b53_regs.h"
+@@ -224,6 +225,8 @@ static const struct b53_mib_desc b53_mibs_58xx[] = {
+ 
+ #define B53_MIBS_58XX_SIZE    ARRAY_SIZE(b53_mibs_58xx)
+ 
++#define B53_MAX_MTU           (9720 - ETH_HLEN - VLAN_HLEN - ETH_FCS_LEN)
++
+ static int b53_do_vlan_op(struct b53_device *dev, u8 op)
+ {
+       unsigned int i;
+@@ -2234,7 +2237,7 @@ static int b53_change_mtu(struct dsa_switch *ds, int port, int mtu)
+ 
+ static int b53_get_max_mtu(struct dsa_switch *ds, int port)
+ {
+-      return JMS_MAX_SIZE;
++      return B53_MAX_MTU;
+ }
+ 
+ static const struct dsa_switch_ops b53_switch_ops = {
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-dsa-b53-fix-max-mtu-for-bcm5325-bcm5365.patch b/queue-5.15/net-dsa-b53-fix-max-mtu-for-bcm5325-bcm5365.patch

new file mode 100644 (file)

index 0000000..e67d70b
--- /dev/null
+++ b/queue-5.15/net-dsa-b53-fix-max-mtu-for-bcm5325-bcm5365.patch
@@ -0,0 +1,49 @@
+From f335134cabe40f02f715aaa97b038e891d6a8d54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 10:47:19 +0200
+Subject: net: dsa: b53: fix max MTU for BCM5325/BCM5365
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit ca8c1f71c10193c270f772d70d34b15ad765d6a8 ]
+
+BCM5325/BCM5365 do not support jumbo frames, so we should not report a
+jumbo frame mtu for them. But they do support so called "oversized"
+frames up to 1536 bytes long by default, so report an appropriate MTU.
+
+Fixes: 6ae5834b983a ("net: dsa: b53: add MTU configuration support")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
+index be1550332326a..3aa0a60d7c71e 100644
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -225,6 +225,7 @@ static const struct b53_mib_desc b53_mibs_58xx[] = {
+ 
+ #define B53_MIBS_58XX_SIZE    ARRAY_SIZE(b53_mibs_58xx)
+ 
++#define B53_MAX_MTU_25                (1536 - ETH_HLEN - VLAN_HLEN - ETH_FCS_LEN)
+ #define B53_MAX_MTU           (9720 - ETH_HLEN - VLAN_HLEN - ETH_FCS_LEN)
+ 
+ static int b53_do_vlan_op(struct b53_device *dev, u8 op)
+@@ -2237,6 +2238,11 @@ static int b53_change_mtu(struct dsa_switch *ds, int port, int mtu)
+ 
+ static int b53_get_max_mtu(struct dsa_switch *ds, int port)
+ {
++      struct b53_device *dev = ds->priv;
++
++      if (is5325(dev) || is5365(dev))
++              return B53_MAX_MTU_25;
++
+       return B53_MAX_MTU;
+ }
+ 
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-ibm-emac-mal-fix-wrong-goto.patch b/queue-5.15/net-ibm-emac-mal-fix-wrong-goto.patch

new file mode 100644 (file)

index 0000000..5eab53e
--- /dev/null
+++ b/queue-5.15/net-ibm-emac-mal-fix-wrong-goto.patch
@@ -0,0 +1,36 @@
+From b2500512d7e581b7ceb79baadd9347f205fcf264 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 16:57:11 -0700
+Subject: net: ibm: emac: mal: fix wrong goto
+
+From: Rosen Penev <rosenp@gmail.com>
+
+[ Upstream commit 08c8acc9d8f3f70d62dd928571368d5018206490 ]
+
+dcr_map is called in the previous if and therefore needs to be unmapped.
+
+Fixes: 1ff0fcfcb1a6 ("ibm_newemac: Fix new MAL feature handling")
+Signed-off-by: Rosen Penev <rosenp@gmail.com>
+Link: https://patch.msgid.link/20241007235711.5714-1-rosenp@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ibm/emac/mal.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
+index 075c07303f165..b095d5057b5eb 100644
+--- a/drivers/net/ethernet/ibm/emac/mal.c
++++ b/drivers/net/ethernet/ibm/emac/mal.c
+@@ -576,7 +576,7 @@ static int mal_probe(struct platform_device *ofdev)
+               printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n",
+                               ofdev->dev.of_node);
+               err = -ENODEV;
+-              goto fail;
++              goto fail_unmap;
+ #endif
+       }
+ 
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-phy-bcm84881-fix-some-error-handling-paths.patch b/queue-5.15/net-phy-bcm84881-fix-some-error-handling-paths.patch

new file mode 100644 (file)

index 0000000..420bfdc
--- /dev/null
+++ b/queue-5.15/net-phy-bcm84881-fix-some-error-handling-paths.patch
@@ -0,0 +1,46 @@
+From 05b6ad43201f446307db01ceb41fa99e95509993 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Oct 2024 21:03:21 +0200
+Subject: net: phy: bcm84881: Fix some error handling paths
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 9234a2549cb6ac038bec36cc7c084218e9575513 ]
+
+If phy_read_mmd() fails, the error code stored in 'bmsr' should be returned
+instead of 'val' which is likely to be 0.
+
+Fixes: 75f4d8d10e01 ("net: phy: add Broadcom BCM84881 PHY driver")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Link: https://patch.msgid.link/3e1755b0c40340d00e089d6adae5bca2f8c79e53.1727982168.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/bcm84881.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c
+index 9717a1626f3fa..37a64a37b2ae3 100644
+--- a/drivers/net/phy/bcm84881.c
++++ b/drivers/net/phy/bcm84881.c
+@@ -120,7 +120,7 @@ static int bcm84881_aneg_done(struct phy_device *phydev)
+ 
+       bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR);
+       if (bmsr < 0)
+-              return val;
++              return bmsr;
+ 
+       return !!(val & MDIO_AN_STAT1_COMPLETE) &&
+              !!(bmsr & BMSR_ANEGCOMPLETE);
+@@ -146,7 +146,7 @@ static int bcm84881_read_status(struct phy_device *phydev)
+ 
+       bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR);
+       if (bmsr < 0)
+-              return val;
++              return bmsr;
+ 
+       phydev->autoneg_complete = !!(val & MDIO_AN_STAT1_COMPLETE) &&
+                                  !!(bmsr & BMSR_ANEGCOMPLETE);
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-phy-dp83869-fix-memory-corruption-when-enabling-.patch b/queue-5.15/net-phy-dp83869-fix-memory-corruption-when-enabling-.patch

new file mode 100644 (file)

index 0000000..2e95057
--- /dev/null
+++ b/queue-5.15/net-phy-dp83869-fix-memory-corruption-when-enabling-.patch
@@ -0,0 +1,43 @@
+From 053cab0ee45a5bcfd1d36d6cbebc2fc0a5766fdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Oct 2024 18:18:07 +0200
+Subject: net: phy: dp83869: fix memory corruption when enabling fiber
+
+From: Ingo van Lil <inguin@gmx.de>
+
+[ Upstream commit a842e443ca8184f2dc82ab307b43a8b38defd6a5 ]
+
+When configuring the fiber port, the DP83869 PHY driver incorrectly
+calls linkmode_set_bit() with a bit mask (1 << 10) rather than a bit
+number (10). This corrupts some other memory location -- in case of
+arm64 the priv pointer in the same structure.
+
+Since the advertising flags are updated from supported at the end of the
+function the incorrect line isn't needed at all and can be removed.
+
+Fixes: a29de52ba2a1 ("net: dp83869: Add ability to advertise Fiber connection")
+Signed-off-by: Ingo van Lil <inguin@gmx.de>
+Reviewed-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20241002161807.440378-1-inguin@gmx.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83869.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
+index cdf4e22fe85d0..a76fd5f11aca0 100644
+--- a/drivers/net/phy/dp83869.c
++++ b/drivers/net/phy/dp83869.c
+@@ -644,7 +644,6 @@ static int dp83869_configure_fiber(struct phy_device *phydev,
+                    phydev->supported);
+ 
+       linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported);
+-      linkmode_set_bit(ADVERTISED_FIBRE, phydev->advertising);
+ 
+       if (dp83869->mode == DP83869_RGMII_1000_BASE) {
+               linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-rtnetlink-add-msg-kind-names.patch b/queue-5.15/net-rtnetlink-add-msg-kind-names.patch

new file mode 100644 (file)

index 0000000..643e104
--- /dev/null
+++ b/queue-5.15/net-rtnetlink-add-msg-kind-names.patch
@@ -0,0 +1,73 @@
+From 964d19d6cae8e7b06ad3d8363ba6a5b85755169e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Apr 2022 13:51:51 +0300
+Subject: net: rtnetlink: add msg kind names
+
+From: Nikolay Aleksandrov <razor@blackwall.org>
+
+[ Upstream commit 12dc5c2cb7b269c5a1c6d02844f40bfce942a7a6 ]
+
+Add rtnl kind names instead of using raw values. We'll need to
+check for DEL kind later to validate bulk flag support.
+
+Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: d51705614f66 ("mctp: Handle error of rtnl_register_module().")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/rtnetlink.h | 7 +++++++
+ net/core/rtnetlink.c    | 6 +++---
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
+index a2a74e0e5c494..c9d3ae92c9321 100644
+--- a/include/net/rtnetlink.h
++++ b/include/net/rtnetlink.h
+@@ -13,6 +13,13 @@ enum rtnl_link_flags {
+       RTNL_FLAG_DOIT_UNLOCKED = 1,
+ };
+ 
++enum rtnl_kinds {
++      RTNL_KIND_NEW,
++      RTNL_KIND_DEL,
++      RTNL_KIND_GET,
++      RTNL_KIND_SET
++};
++
+ void rtnl_register(int protocol, int msgtype,
+                  rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
+ int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index eca7f6f4a52f5..8fc86d1edf561 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -5521,11 +5521,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ {
+       struct net *net = sock_net(skb->sk);
+       struct rtnl_link *link;
++      enum rtnl_kinds kind;
+       struct module *owner;
+       int err = -EOPNOTSUPP;
+       rtnl_doit_func doit;
+       unsigned int flags;
+-      int kind;
+       int family;
+       int type;
+ 
+@@ -5542,11 +5542,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+       family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
+       kind = type&3;
+ 
+-      if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
++      if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
+               return -EPERM;
+ 
+       rcu_read_lock();
+-      if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
++      if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+               struct sock *rtnl;
+               rtnl_dumpit_func dumpit;
+               u32 min_dump_alloc = 0;
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-sched-accept-tca_stab-only-for-root-qdisc.patch b/queue-5.15/net-sched-accept-tca_stab-only-for-root-qdisc.patch

new file mode 100644 (file)

index 0000000..f485b9b
--- /dev/null
+++ b/queue-5.15/net-sched-accept-tca_stab-only-for-root-qdisc.patch
@@ -0,0 +1,150 @@
+From 4704b26d2d142ea9613a4aad895ad4ea1fe77755 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 18:41:30 +0000
+Subject: net/sched: accept TCA_STAB only for root qdisc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3cb7cf1540ddff5473d6baeb530228d19bc97b8a ]
+
+Most qdiscs maintain their backlog using qdisc_pkt_len(skb)
+on the assumption it is invariant between the enqueue()
+and dequeue() handlers.
+
+Unfortunately syzbot can crash a host rather easily using
+a TBF + SFQ combination, with an STAB on SFQ [1]
+
+We can't support TCA_STAB on arbitrary level, this would
+require to maintain per-qdisc storage.
+
+[1]
+[   88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000
+[   88.798611] #PF: supervisor read access in kernel mode
+[   88.799014] #PF: error_code(0x0000) - not-present page
+[   88.799506] PGD 0 P4D 0
+[   88.799829] Oops: Oops: 0000 [#1] SMP NOPTI
+[   88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117
+[   88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+[   88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
+[ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00
+All code
+========
+   0:  0f b7 50 12             movzwl 0x12(%rax),%edx
+   4:  48 8d 04 d5 00 00 00    lea    0x0(,%rdx,8),%rax
+   b:  00
+   c:  48 89 d6                mov    %rdx,%rsi
+   f:  48 29 d0                sub    %rdx,%rax
+  12:  48 8b 91 c0 01 00 00    mov    0x1c0(%rcx),%rdx
+  19:  48 c1 e0 03             shl    $0x3,%rax
+  1d:  48 01 c2                add    %rax,%rdx
+  20:  66 83 7a 1a 00          cmpw   $0x0,0x1a(%rdx)
+  25:  7e c0                   jle    0xffffffffffffffe7
+  27:  48 8b 3a                mov    (%rdx),%rdi
+  2a:* 4c 8b 07                mov    (%rdi),%r8               <-- trapping instruction
+  2d:  4c 89 02                mov    %r8,(%rdx)
+  30:  49 89 50 08             mov    %rdx,0x8(%r8)
+  34:  48 c7 47 08 00 00 00    movq   $0x0,0x8(%rdi)
+  3b:  00
+  3c:  48                      rex.W
+  3d:  c7                      .byte 0xc7
+  3e:  07                      (bad)
+       ...
+
+Code starting with the faulting instruction
+===========================================
+   0:  4c 8b 07                mov    (%rdi),%r8
+   3:  4c 89 02                mov    %r8,(%rdx)
+   6:  49 89 50 08             mov    %rdx,0x8(%r8)
+   a:  48 c7 47 08 00 00 00    movq   $0x0,0x8(%rdi)
+  11:  00
+  12:  48                      rex.W
+  13:  c7                      .byte 0xc7
+  14:  07                      (bad)
+       ...
+[   88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206
+[   88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800
+[   88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000
+[   88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f
+[   88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140
+[   88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac
+[   88.806734] FS:  00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000
+[   88.807225] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0
+[   88.808165] Call Trace:
+[   88.808459]  <TASK>
+[   88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434)
+[   88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715)
+[   88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
+[   88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
+[   88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
+[   88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq
+[   88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
+[   88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf
+[   88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
+[   88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958)
+[   88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673)
+[   88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517)
+[   88.812505] __fput (fs/file_table.c:432 (discriminator 1))
+[   88.812735] task_work_run (kernel/task_work.c:230)
+[   88.813016] do_exit (kernel/exit.c:940)
+[   88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4))
+[   88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088)
+[   88.813867] do_group_exit (kernel/exit.c:1070)
+[   88.814138] __x64_sys_exit_group (kernel/exit.c:1099)
+[   88.814490] x64_sys_call (??:?)
+[   88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
+[   88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+[   88.815495] RIP: 0033:0x7f44560f1975
+
+Fixes: 175f9c1bba9b ("net_sched: Add size table for qdiscs")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sch_generic.h | 1 -
+ net/sched/sch_api.c       | 7 ++++++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 6906da5c733ea..0919dfd3a67a6 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -829,7 +829,6 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
+ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                               struct sk_buff **to_free)
+ {
+-      qdisc_calculate_pkt_len(skb, sch);
+       return sch->enqueue(skb, sch, to_free);
+ }
+ 
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index 7fdc2c1f87561..724bfeccc6e7f 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -589,7 +589,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+               pkt_len = 1;
+       qdisc_skb_cb(skb)->pkt_len = pkt_len;
+ }
+-EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
+ 
+ void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
+ {
+@@ -1119,6 +1118,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+                       return -EINVAL;
+               }
+ 
++              if (new &&
++                  !(parent->flags & TCQ_F_MQROOT) &&
++                  rcu_access_pointer(new->stab)) {
++                      NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
++                      return -EINVAL;
++              }
+               err = cops->graft(parent, cl, new, &old, extack);
+               if (err)
+                       return err;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch b/queue-5.15/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch

new file mode 100644 (file)

index 0000000..46af930
--- /dev/null
+++ b/queue-5.15/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch
@@ -0,0 +1,179 @@
+From fc4790342dd7d7f1ee67b06cebcd2c471f0449e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 08:43:59 -0700
+Subject: netfilter: br_netfilter: fix panic with metadata_dst skb
+
+From: Andy Roulin <aroulin@nvidia.com>
+
+[ Upstream commit f9ff7665cd128012868098bbd07e28993e314fdb ]
+
+Fix a kernel panic in the br_netfilter module when sending untagged
+traffic via a VxLAN device.
+This happens during the check for fragmentation in br_nf_dev_queue_xmit.
+
+It is dependent on:
+1) the br_netfilter module being loaded;
+2) net.bridge.bridge-nf-call-iptables set to 1;
+3) a bridge with a VxLAN (single-vxlan-device) netdevice as a bridge port;
+4) untagged frames with size higher than the VxLAN MTU forwarded/flooded
+
+When forwarding the untagged packet to the VxLAN bridge port, before
+the netfilter hooks are called, br_handle_egress_vlan_tunnel is called and
+changes the skb_dst to the tunnel dst. The tunnel_dst is a metadata type
+of dst, i.e., skb_valid_dst(skb) is false, and metadata->dst.dev is NULL.
+
+Then in the br_netfilter hooks, in br_nf_dev_queue_xmit, there's a check
+for frames that needs to be fragmented: frames with higher MTU than the
+VxLAN device end up calling br_nf_ip_fragment, which in turns call
+ip_skb_dst_mtu.
+
+The ip_dst_mtu tries to use the skb_dst(skb) as if it was a valid dst
+with valid dst->dev, thus the crash.
+
+This case was never supported in the first place, so drop the packet
+instead.
+
+PING 10.0.0.2 (10.0.0.2) from 0.0.0.0 h1-eth0: 2000(2028) bytes of data.
+[  176.291791] Unable to handle kernel NULL pointer dereference at
+virtual address 0000000000000110
+[  176.292101] Mem abort info:
+[  176.292184]   ESR = 0x0000000096000004
+[  176.292322]   EC = 0x25: DABT (current EL), IL = 32 bits
+[  176.292530]   SET = 0, FnV = 0
+[  176.292709]   EA = 0, S1PTW = 0
+[  176.292862]   FSC = 0x04: level 0 translation fault
+[  176.293013] Data abort info:
+[  176.293104]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+[  176.293488]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[  176.293787]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[  176.293995] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000043ef5000
+[  176.294166] [0000000000000110] pgd=0000000000000000,
+p4d=0000000000000000
+[  176.294827] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[  176.295252] Modules linked in: vxlan ip6_udp_tunnel udp_tunnel veth
+br_netfilter bridge stp llc ipv6 crct10dif_ce
+[  176.295923] CPU: 0 PID: 188 Comm: ping Not tainted
+6.8.0-rc3-g5b3fbd61b9d1 #2
+[  176.296314] Hardware name: linux,dummy-virt (DT)
+[  176.296535] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS
+BTYPE=--)
+[  176.296808] pc : br_nf_dev_queue_xmit+0x390/0x4ec [br_netfilter]
+[  176.297382] lr : br_nf_dev_queue_xmit+0x2ac/0x4ec [br_netfilter]
+[  176.297636] sp : ffff800080003630
+[  176.297743] x29: ffff800080003630 x28: 0000000000000008 x27:
+ffff6828c49ad9f8
+[  176.298093] x26: ffff6828c49ad000 x25: 0000000000000000 x24:
+00000000000003e8
+[  176.298430] x23: 0000000000000000 x22: ffff6828c4960b40 x21:
+ffff6828c3b16d28
+[  176.298652] x20: ffff6828c3167048 x19: ffff6828c3b16d00 x18:
+0000000000000014
+[  176.298926] x17: ffffb0476322f000 x16: ffffb7e164023730 x15:
+0000000095744632
+[  176.299296] x14: ffff6828c3f1c880 x13: 0000000000000002 x12:
+ffffb7e137926a70
+[  176.299574] x11: 0000000000000001 x10: ffff6828c3f1c898 x9 :
+0000000000000000
+[  176.300049] x8 : ffff6828c49bf070 x7 : 0008460f18d5f20e x6 :
+f20e0100bebafeca
+[  176.300302] x5 : ffff6828c7f918fe x4 : ffff6828c49bf070 x3 :
+0000000000000000
+[  176.300586] x2 : 0000000000000000 x1 : ffff6828c3c7ad00 x0 :
+ffff6828c7f918f0
+[  176.300889] Call trace:
+[  176.301123]  br_nf_dev_queue_xmit+0x390/0x4ec [br_netfilter]
+[  176.301411]  br_nf_post_routing+0x2a8/0x3e4 [br_netfilter]
+[  176.301703]  nf_hook_slow+0x48/0x124
+[  176.302060]  br_forward_finish+0xc8/0xe8 [bridge]
+[  176.302371]  br_nf_hook_thresh+0x124/0x134 [br_netfilter]
+[  176.302605]  br_nf_forward_finish+0x118/0x22c [br_netfilter]
+[  176.302824]  br_nf_forward_ip.part.0+0x264/0x290 [br_netfilter]
+[  176.303136]  br_nf_forward+0x2b8/0x4e0 [br_netfilter]
+[  176.303359]  nf_hook_slow+0x48/0x124
+[  176.303803]  __br_forward+0xc4/0x194 [bridge]
+[  176.304013]  br_flood+0xd4/0x168 [bridge]
+[  176.304300]  br_handle_frame_finish+0x1d4/0x5c4 [bridge]
+[  176.304536]  br_nf_hook_thresh+0x124/0x134 [br_netfilter]
+[  176.304978]  br_nf_pre_routing_finish+0x29c/0x494 [br_netfilter]
+[  176.305188]  br_nf_pre_routing+0x250/0x524 [br_netfilter]
+[  176.305428]  br_handle_frame+0x244/0x3cc [bridge]
+[  176.305695]  __netif_receive_skb_core.constprop.0+0x33c/0xecc
+[  176.306080]  __netif_receive_skb_one_core+0x40/0x8c
+[  176.306197]  __netif_receive_skb+0x18/0x64
+[  176.306369]  process_backlog+0x80/0x124
+[  176.306540]  __napi_poll+0x38/0x17c
+[  176.306636]  net_rx_action+0x124/0x26c
+[  176.306758]  __do_softirq+0x100/0x26c
+[  176.307051]  ____do_softirq+0x10/0x1c
+[  176.307162]  call_on_irq_stack+0x24/0x4c
+[  176.307289]  do_softirq_own_stack+0x1c/0x2c
+[  176.307396]  do_softirq+0x54/0x6c
+[  176.307485]  __local_bh_enable_ip+0x8c/0x98
+[  176.307637]  __dev_queue_xmit+0x22c/0xd28
+[  176.307775]  neigh_resolve_output+0xf4/0x1a0
+[  176.308018]  ip_finish_output2+0x1c8/0x628
+[  176.308137]  ip_do_fragment+0x5b4/0x658
+[  176.308279]  ip_fragment.constprop.0+0x48/0xec
+[  176.308420]  __ip_finish_output+0xa4/0x254
+[  176.308593]  ip_finish_output+0x34/0x130
+[  176.308814]  ip_output+0x6c/0x108
+[  176.308929]  ip_send_skb+0x50/0xf0
+[  176.309095]  ip_push_pending_frames+0x30/0x54
+[  176.309254]  raw_sendmsg+0x758/0xaec
+[  176.309568]  inet_sendmsg+0x44/0x70
+[  176.309667]  __sys_sendto+0x110/0x178
+[  176.309758]  __arm64_sys_sendto+0x28/0x38
+[  176.309918]  invoke_syscall+0x48/0x110
+[  176.310211]  el0_svc_common.constprop.0+0x40/0xe0
+[  176.310353]  do_el0_svc+0x1c/0x28
+[  176.310434]  el0_svc+0x34/0xb4
+[  176.310551]  el0t_64_sync_handler+0x120/0x12c
+[  176.310690]  el0t_64_sync+0x190/0x194
+[  176.311066] Code: f9402e61 79402aa2 927ff821 f9400023 (f9408860)
+[  176.315743] ---[ end trace 0000000000000000 ]---
+[  176.316060] Kernel panic - not syncing: Oops: Fatal exception in
+interrupt
+[  176.316371] Kernel Offset: 0x37e0e3000000 from 0xffff800080000000
+[  176.316564] PHYS_OFFSET: 0xffff97d780000000
+[  176.316782] CPU features: 0x0,88000203,3c020000,0100421b
+[  176.317210] Memory Limit: none
+[  176.317527] ---[ end Kernel panic - not syncing: Oops: Fatal
+Exception in interrupt ]---\
+
+Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Andy Roulin <aroulin@nvidia.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://patch.msgid.link/20241001154400.22787-2-aroulin@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_netfilter_hooks.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index d0d41dbbfe382..7cd2bef583312 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -33,6 +33,7 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/addrconf.h>
++#include <net/dst_metadata.h>
+ #include <net/route.h>
+ #include <net/netfilter/br_netfilter.h>
+ #include <net/netns/generic.h>
+@@ -851,6 +852,10 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
+               return br_dev_queue_push_xmit(net, sk, skb);
+       }
+ 
++      /* Fragmentation on metadata/template dst is not supported */
++      if (unlikely(!skb_valid_dst(skb)))
++              goto drop;
++
+       /* This is wrong! We should preserve the original fragment
+        * boundaries by preserving frag_list rather than refragmenting.
+        */
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-fib-check-correct-rtable-in-vrf-setups.patch b/queue-5.15/netfilter-fib-check-correct-rtable-in-vrf-setups.patch

new file mode 100644 (file)

index 0000000..a84a16f
--- /dev/null
+++ b/queue-5.15/netfilter-fib-check-correct-rtable-in-vrf-setups.patch
@@ -0,0 +1,80 @@
+From 671ef792c75d72e0a02e3fa87f88fbecbafbb851 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 09:19:02 +0200
+Subject: netfilter: fib: check correct rtable in vrf setups
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 05ef7055debc804e8083737402127975e7244fc4 ]
+
+We need to init l3mdev unconditionally, else main routing table is searched
+and incorrect result is returned unless strict (iif keyword) matching is
+requested.
+
+Next patch adds a selftest for this.
+
+Fixes: 2a8a7c0eaa87 ("netfilter: nft_fib: Fix for rpath check with VRF devices")
+Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1761
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/netfilter/nft_fib_ipv4.c | 4 +---
+ net/ipv6/netfilter/nft_fib_ipv6.c | 5 +++--
+ 2 files changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
+index 0f6a58558bab6..273b64e3f2f92 100644
+--- a/net/ipv4/netfilter/nft_fib_ipv4.c
++++ b/net/ipv4/netfilter/nft_fib_ipv4.c
+@@ -66,6 +66,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+               .flowi4_scope = RT_SCOPE_UNIVERSE,
+               .flowi4_iif = LOOPBACK_IFINDEX,
+               .flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
++              .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
+       };
+       const struct net_device *oif;
+       const struct net_device *found;
+@@ -84,9 +85,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+       else
+               oif = NULL;
+ 
+-      if (priv->flags & NFTA_FIB_F_IIF)
+-              fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif);
+-
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+           nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+               nft_fib_store_result(dest, priv, nft_in(pkt));
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index 4239b8056b5bd..1a08b00aa3213 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -37,8 +37,6 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+       if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+               lookup_flags |= RT6_LOOKUP_F_IFACE;
+               fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+-      } else if (priv->flags & NFTA_FIB_F_IIF) {
+-              fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
+       }
+ 
+       if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+@@ -71,6 +69,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+       else if (priv->flags & NFTA_FIB_F_OIF)
+               dev = nft_out(pkt);
+ 
++      fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
++
+       nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
+ 
+       if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+@@ -161,6 +161,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+               .flowi6_iif = LOOPBACK_IFINDEX,
+               .flowi6_proto = pkt->tprot,
+               .flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
++              .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
+       };
+       struct rt6_info *rt;
+       int lookup_flags;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-rpfilter-fib-populate-flowic_l3mdev-field.patch b/queue-5.15/netfilter-rpfilter-fib-populate-flowic_l3mdev-field.patch

new file mode 100644 (file)

index 0000000..750bfb5
--- /dev/null
+++ b/queue-5.15/netfilter-rpfilter-fib-populate-flowic_l3mdev-field.patch
@@ -0,0 +1,106 @@
+From 8780ce100c3197dd2c39a755ce35161cb1ecbfbe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Oct 2022 18:07:05 +0200
+Subject: netfilter: rpfilter/fib: Populate flowic_l3mdev field
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit acc641ab95b66b813c1ce856c377a2bbe71e7f52 ]
+
+Use the introduced field for correct operation with VRF devices instead
+of conditionally overwriting flowic_oif. This is a partial revert of
+commit b575b24b8eee3 ("netfilter: Fix rpfilter dropping vrf packets by
+mistake"), implementing a simpler solution.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Stable-dep-of: 05ef7055debc ("netfilter: fib: check correct rtable in vrf setups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/netfilter/ipt_rpfilter.c  | 2 +-
+ net/ipv4/netfilter/nft_fib_ipv4.c  | 2 +-
+ net/ipv6/netfilter/ip6t_rpfilter.c | 9 +++------
+ net/ipv6/netfilter/nft_fib_ipv6.c  | 5 ++---
+ 4 files changed, 7 insertions(+), 11 deletions(-)
+
+diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
+index 8cd3224d913e0..63f3e8219dd5a 100644
+--- a/net/ipv4/netfilter/ipt_rpfilter.c
++++ b/net/ipv4/netfilter/ipt_rpfilter.c
+@@ -78,7 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+       flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+       flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
+       flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+-      flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
++      flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
+ 
+       return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
+ }
+diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
+index 9e6f0f1275e2c..22168f12b3819 100644
+--- a/net/ipv4/netfilter/nft_fib_ipv4.c
++++ b/net/ipv4/netfilter/nft_fib_ipv4.c
+@@ -84,7 +84,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+               oif = NULL;
+ 
+       if (priv->flags & NFTA_FIB_F_IIF)
+-              fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif);
++              fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif);
+ 
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+           nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
+index d800801a5dd27..69d86b040a6af 100644
+--- a/net/ipv6/netfilter/ip6t_rpfilter.c
++++ b/net/ipv6/netfilter/ip6t_rpfilter.c
+@@ -37,6 +37,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
+       bool ret = false;
+       struct flowi6 fl6 = {
+               .flowi6_iif = LOOPBACK_IFINDEX,
++              .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev),
+               .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+               .flowi6_proto = iph->nexthdr,
+               .daddr = iph->saddr,
+@@ -55,9 +56,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
+       if (rpfilter_addr_linklocal(&iph->saddr)) {
+               lookup_flags |= RT6_LOOKUP_F_IFACE;
+               fl6.flowi6_oif = dev->ifindex;
+-      /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+-      } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+-                (flags & XT_RPFILTER_LOOSE) == 0)
++      } else if ((flags & XT_RPFILTER_LOOSE) == 0)
+               fl6.flowi6_oif = dev->ifindex;
+ 
+       rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
+@@ -72,9 +71,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
+               goto out;
+       }
+ 
+-      if (rt->rt6i_idev->dev == dev ||
+-          l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+-          (flags & XT_RPFILTER_LOOSE))
++      if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+               ret = true;
+  out:
+       ip6_rt_put(rt);
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index 602743f6dcee0..72a9a04920ab2 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -37,9 +37,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+       if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+               lookup_flags |= RT6_LOOKUP_F_IFACE;
+               fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+-      } else if ((priv->flags & NFTA_FIB_F_IIF) &&
+-                 (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) {
+-              fl6->flowi6_oif = dev->ifindex;
++      } else if (priv->flags & NFTA_FIB_F_IIF) {
++              fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
+       }
+ 
+       if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-rpfilter-fib-set-flowic_uid-correctly-for-.patch b/queue-5.15/netfilter-rpfilter-fib-set-flowic_uid-correctly-for-.patch

new file mode 100644 (file)

index 0000000..60e93fa
--- /dev/null
+++ b/queue-5.15/netfilter-rpfilter-fib-set-flowic_uid-correctly-for-.patch
@@ -0,0 +1,91 @@
+From 8299e413b23dae0b5263a185a8f831eb6422ea73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Oct 2022 16:37:47 +0200
+Subject: netfilter: rpfilter/fib: Set ->flowic_uid correctly for user
+ namespaces.
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 1fcc064b305a1aadeff0d4bff961094d27660acd ]
+
+Currently netfilter's rpfilter and fib modules implicitely initialise
+->flowic_uid with 0. This is normally the root UID. However, this isn't
+the case in user namespaces, where user ID 0 is mapped to a different
+kernel UID. By initialising ->flowic_uid with sock_net_uid(), we get
+the root UID of the user namespace, thus keeping the same behaviour
+whether or not we're running in a user namepspace.
+
+Note, this is similar to commit 8bcfd0925ef1 ("ipv4: add missing
+initialization for flowi4_uid"), which fixed the rp_filter sysctl.
+
+Fixes: 622ec2c9d524 ("net: core: add UID to flows, rules, and routes")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 05ef7055debc ("netfilter: fib: check correct rtable in vrf setups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/netfilter/ipt_rpfilter.c  | 1 +
+ net/ipv4/netfilter/nft_fib_ipv4.c  | 1 +
+ net/ipv6/netfilter/ip6t_rpfilter.c | 1 +
+ net/ipv6/netfilter/nft_fib_ipv6.c  | 2 ++
+ 4 files changed, 5 insertions(+)
+
+diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
+index 63f3e8219dd5a..26b3b0e2adcd7 100644
+--- a/net/ipv4/netfilter/ipt_rpfilter.c
++++ b/net/ipv4/netfilter/ipt_rpfilter.c
+@@ -79,6 +79,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+       flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
+       flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+       flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
++      flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
+ 
+       return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
+ }
+diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
+index 22168f12b3819..0f6a58558bab6 100644
+--- a/net/ipv4/netfilter/nft_fib_ipv4.c
++++ b/net/ipv4/netfilter/nft_fib_ipv4.c
+@@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+       struct flowi4 fl4 = {
+               .flowi4_scope = RT_SCOPE_UNIVERSE,
+               .flowi4_iif = LOOPBACK_IFINDEX,
++              .flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
+       };
+       const struct net_device *oif;
+       const struct net_device *found;
+diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
+index 69d86b040a6af..a01d9b842bd07 100644
+--- a/net/ipv6/netfilter/ip6t_rpfilter.c
++++ b/net/ipv6/netfilter/ip6t_rpfilter.c
+@@ -40,6 +40,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
+               .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev),
+               .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+               .flowi6_proto = iph->nexthdr,
++              .flowi6_uid = sock_net_uid(net, NULL),
+               .daddr = iph->saddr,
+       };
+       int lookup_flags;
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index 72a9a04920ab2..4239b8056b5bd 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -62,6 +62,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+       struct flowi6 fl6 = {
+               .flowi6_iif = LOOPBACK_IFINDEX,
+               .flowi6_proto = pkt->tprot,
++              .flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
+       };
+       u32 ret = 0;
+ 
+@@ -159,6 +160,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+       struct flowi6 fl6 = {
+               .flowi6_iif = LOOPBACK_IFINDEX,
+               .flowi6_proto = pkt->tprot,
++              .flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
+       };
+       struct rt6_info *rt;
+       int lookup_flags;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-xtables-avoid-nfproto_unspec-where-needed.patch b/queue-5.15/netfilter-xtables-avoid-nfproto_unspec-where-needed.patch

new file mode 100644 (file)

index 0000000..bcb40fa
--- /dev/null
+++ b/queue-5.15/netfilter-xtables-avoid-nfproto_unspec-where-needed.patch
@@ -0,0 +1,996 @@
+From 5c48d72ba695bf34d34d24d2b48138618a563418 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 11:28:16 +0200
+Subject: netfilter: xtables: avoid NFPROTO_UNSPEC where needed
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 0bfcb7b71e735560077a42847f69597ec7dcc326 ]
+
+syzbot managed to call xt_cluster match via ebtables:
+
+ WARNING: CPU: 0 PID: 11 at net/netfilter/xt_cluster.c:72 xt_cluster_mt+0x196/0x780
+ [..]
+ ebt_do_table+0x174b/0x2a40
+
+Module registers to NFPROTO_UNSPEC, but it assumes ipv4/ipv6 packet
+processing.  As this is only useful to restrict locally terminating
+TCP/UDP traffic, register this for ipv4 and ipv6 family only.
+
+Pablo points out that this is a general issue, direct users of the
+set/getsockopt interface can call into targets/matches that were only
+intended for use with ip(6)tables.
+
+Check all UNSPEC matches and targets for similar issues:
+
+- matches and targets are fine except if they assume skb_network_header()
+  is valid -- this is only true when called from inet layer: ip(6) stack
+  pulls the ip/ipv6 header into linear data area.
+- targets that return XT_CONTINUE or other xtables verdicts must be
+  restricted too, they are incompatbile with the ebtables traverser, e.g.
+  EBT_CONTINUE is a completely different value than XT_CONTINUE.
+
+Most matches/targets are changed to register for NFPROTO_IPV4/IPV6, as
+they are provided for use by ip(6)tables.
+
+The MARK target is also used by arptables, so register for NFPROTO_ARP too.
+
+While at it, bail out if connbytes fails to enable the corresponding
+conntrack family.
+
+This change passes the selftests in iptables.git.
+
+Reported-by: syzbot+256c348558aa5cf611a9@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netfilter-devel/66fec2e2.050a0220.9ec68.0047.GAE@google.com/
+Fixes: 0269ea493734 ("netfilter: xtables: add cluster match")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Co-developed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/xt_CHECKSUM.c    |  33 ++++++----
+ net/netfilter/xt_CLASSIFY.c    |  16 ++++-
+ net/netfilter/xt_CONNSECMARK.c |  36 +++++++----
+ net/netfilter/xt_CT.c          | 106 +++++++++++++++++++++------------
+ net/netfilter/xt_IDLETIMER.c   |  59 ++++++++++++------
+ net/netfilter/xt_LED.c         |  39 ++++++++----
+ net/netfilter/xt_NFLOG.c       |  36 +++++++----
+ net/netfilter/xt_RATEEST.c     |  39 ++++++++----
+ net/netfilter/xt_SECMARK.c     |  27 ++++++++-
+ net/netfilter/xt_TRACE.c       |  35 +++++++----
+ net/netfilter/xt_addrtype.c    |  15 ++++-
+ net/netfilter/xt_cluster.c     |  33 ++++++----
+ net/netfilter/xt_connbytes.c   |   4 +-
+ net/netfilter/xt_connlimit.c   |  39 ++++++++----
+ net/netfilter/xt_connmark.c    |  28 ++++++++-
+ net/netfilter/xt_mark.c        |  42 +++++++++----
+ 16 files changed, 422 insertions(+), 165 deletions(-)
+
+diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
+index c8a639f561684..9d99f5a3d1764 100644
+--- a/net/netfilter/xt_CHECKSUM.c
++++ b/net/netfilter/xt_CHECKSUM.c
+@@ -63,24 +63,37 @@ static int checksum_tg_check(const struct xt_tgchk_param *par)
+       return 0;
+ }
+ 
+-static struct xt_target checksum_tg_reg __read_mostly = {
+-      .name           = "CHECKSUM",
+-      .family         = NFPROTO_UNSPEC,
+-      .target         = checksum_tg,
+-      .targetsize     = sizeof(struct xt_CHECKSUM_info),
+-      .table          = "mangle",
+-      .checkentry     = checksum_tg_check,
+-      .me             = THIS_MODULE,
++static struct xt_target checksum_tg_reg[] __read_mostly = {
++      {
++              .name           = "CHECKSUM",
++              .family         = NFPROTO_IPV4,
++              .target         = checksum_tg,
++              .targetsize     = sizeof(struct xt_CHECKSUM_info),
++              .table          = "mangle",
++              .checkentry     = checksum_tg_check,
++              .me             = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "CHECKSUM",
++              .family         = NFPROTO_IPV6,
++              .target         = checksum_tg,
++              .targetsize     = sizeof(struct xt_CHECKSUM_info),
++              .table          = "mangle",
++              .checkentry     = checksum_tg_check,
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init checksum_tg_init(void)
+ {
+-      return xt_register_target(&checksum_tg_reg);
++      return xt_register_targets(checksum_tg_reg, ARRAY_SIZE(checksum_tg_reg));
+ }
+ 
+ static void __exit checksum_tg_exit(void)
+ {
+-      xt_unregister_target(&checksum_tg_reg);
++      xt_unregister_targets(checksum_tg_reg, ARRAY_SIZE(checksum_tg_reg));
+ }
+ 
+ module_init(checksum_tg_init);
+diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
+index 0accac98dea78..0ae8d8a1216e1 100644
+--- a/net/netfilter/xt_CLASSIFY.c
++++ b/net/netfilter/xt_CLASSIFY.c
+@@ -38,9 +38,9 @@ static struct xt_target classify_tg_reg[] __read_mostly = {
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+-              .family     = NFPROTO_UNSPEC,
++              .family     = NFPROTO_IPV4,
+               .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+-                            (1 << NF_INET_POST_ROUTING),
++                            (1 << NF_INET_POST_ROUTING),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+@@ -54,6 +54,18 @@ static struct xt_target classify_tg_reg[] __read_mostly = {
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name       = "CLASSIFY",
++              .revision   = 0,
++              .family     = NFPROTO_IPV6,
++              .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
++                            (1 << NF_INET_POST_ROUTING),
++              .target     = classify_tg,
++              .targetsize = sizeof(struct xt_classify_target_info),
++              .me         = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init classify_tg_init(void)
+diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
+index 76acecf3e757a..1494b3ee30e11 100644
+--- a/net/netfilter/xt_CONNSECMARK.c
++++ b/net/netfilter/xt_CONNSECMARK.c
+@@ -114,25 +114,39 @@ static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
+       nf_ct_netns_put(par->net, par->family);
+ }
+ 
+-static struct xt_target connsecmark_tg_reg __read_mostly = {
+-      .name       = "CONNSECMARK",
+-      .revision   = 0,
+-      .family     = NFPROTO_UNSPEC,
+-      .checkentry = connsecmark_tg_check,
+-      .destroy    = connsecmark_tg_destroy,
+-      .target     = connsecmark_tg,
+-      .targetsize = sizeof(struct xt_connsecmark_target_info),
+-      .me         = THIS_MODULE,
++static struct xt_target connsecmark_tg_reg[] __read_mostly = {
++      {
++              .name       = "CONNSECMARK",
++              .revision   = 0,
++              .family     = NFPROTO_IPV4,
++              .checkentry = connsecmark_tg_check,
++              .destroy    = connsecmark_tg_destroy,
++              .target     = connsecmark_tg,
++              .targetsize = sizeof(struct xt_connsecmark_target_info),
++              .me         = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name       = "CONNSECMARK",
++              .revision   = 0,
++              .family     = NFPROTO_IPV6,
++              .checkentry = connsecmark_tg_check,
++              .destroy    = connsecmark_tg_destroy,
++              .target     = connsecmark_tg,
++              .targetsize = sizeof(struct xt_connsecmark_target_info),
++              .me         = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init connsecmark_tg_init(void)
+ {
+-      return xt_register_target(&connsecmark_tg_reg);
++      return xt_register_targets(connsecmark_tg_reg, ARRAY_SIZE(connsecmark_tg_reg));
+ }
+ 
+ static void __exit connsecmark_tg_exit(void)
+ {
+-      xt_unregister_target(&connsecmark_tg_reg);
++      xt_unregister_targets(connsecmark_tg_reg, ARRAY_SIZE(connsecmark_tg_reg));
+ }
+ 
+ module_init(connsecmark_tg_init);
+diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
+index 267757b0392a6..5d19cb059b197 100644
+--- a/net/netfilter/xt_CT.c
++++ b/net/netfilter/xt_CT.c
+@@ -300,10 +300,30 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+       xt_ct_tg_destroy(par, par->targinfo);
+ }
+ 
++static unsigned int
++notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
++{
++      /* Previously seen (loopback)? Ignore. */
++      if (skb->_nfct != 0)
++              return XT_CONTINUE;
++
++      nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
++
++      return XT_CONTINUE;
++}
++
+ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
++      {
++              .name           = "NOTRACK",
++              .revision       = 0,
++              .family         = NFPROTO_IPV4,
++              .target         = notrack_tg,
++              .table          = "raw",
++              .me             = THIS_MODULE,
++      },
+       {
+               .name           = "CT",
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .targetsize     = sizeof(struct xt_ct_target_info),
+               .usersize       = offsetof(struct xt_ct_target_info, ct),
+               .checkentry     = xt_ct_tg_check_v0,
+@@ -314,7 +334,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
+       },
+       {
+               .name           = "CT",
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .revision       = 1,
+               .targetsize     = sizeof(struct xt_ct_target_info_v1),
+               .usersize       = offsetof(struct xt_ct_target_info, ct),
+@@ -326,7 +346,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
+       },
+       {
+               .name           = "CT",
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .revision       = 2,
+               .targetsize     = sizeof(struct xt_ct_target_info_v1),
+               .usersize       = offsetof(struct xt_ct_target_info, ct),
+@@ -336,49 +356,61 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
+               .table          = "raw",
+               .me             = THIS_MODULE,
+       },
+-};
+-
+-static unsigned int
+-notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
+-{
+-      /* Previously seen (loopback)? Ignore. */
+-      if (skb->_nfct != 0)
+-              return XT_CONTINUE;
+-
+-      nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+-
+-      return XT_CONTINUE;
+-}
+-
+-static struct xt_target notrack_tg_reg __read_mostly = {
+-      .name           = "NOTRACK",
+-      .revision       = 0,
+-      .family         = NFPROTO_UNSPEC,
+-      .target         = notrack_tg,
+-      .table          = "raw",
+-      .me             = THIS_MODULE,
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "NOTRACK",
++              .revision       = 0,
++              .family         = NFPROTO_IPV6,
++              .target         = notrack_tg,
++              .table          = "raw",
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "CT",
++              .family         = NFPROTO_IPV6,
++              .targetsize     = sizeof(struct xt_ct_target_info),
++              .usersize       = offsetof(struct xt_ct_target_info, ct),
++              .checkentry     = xt_ct_tg_check_v0,
++              .destroy        = xt_ct_tg_destroy_v0,
++              .target         = xt_ct_target_v0,
++              .table          = "raw",
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "CT",
++              .family         = NFPROTO_IPV6,
++              .revision       = 1,
++              .targetsize     = sizeof(struct xt_ct_target_info_v1),
++              .usersize       = offsetof(struct xt_ct_target_info, ct),
++              .checkentry     = xt_ct_tg_check_v1,
++              .destroy        = xt_ct_tg_destroy_v1,
++              .target         = xt_ct_target_v1,
++              .table          = "raw",
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "CT",
++              .family         = NFPROTO_IPV6,
++              .revision       = 2,
++              .targetsize     = sizeof(struct xt_ct_target_info_v1),
++              .usersize       = offsetof(struct xt_ct_target_info, ct),
++              .checkentry     = xt_ct_tg_check_v2,
++              .destroy        = xt_ct_tg_destroy_v1,
++              .target         = xt_ct_target_v1,
++              .table          = "raw",
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init xt_ct_tg_init(void)
+ {
+-      int ret;
+-
+-      ret = xt_register_target(&notrack_tg_reg);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+-      if (ret < 0) {
+-              xt_unregister_target(&notrack_tg_reg);
+-              return ret;
+-      }
+-      return 0;
++      return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+ }
+ 
+ static void __exit xt_ct_tg_exit(void)
+ {
+       xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+-      xt_unregister_target(&notrack_tg_reg);
+ }
+ 
+ module_init(xt_ct_tg_init);
+diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
+index 2f7cf5ecebf4f..a097686adbbd7 100644
+--- a/net/netfilter/xt_IDLETIMER.c
++++ b/net/netfilter/xt_IDLETIMER.c
+@@ -458,28 +458,49 @@ static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par)
+ 
+ static struct xt_target idletimer_tg[] __read_mostly = {
+       {
+-      .name           = "IDLETIMER",
+-      .family         = NFPROTO_UNSPEC,
+-      .target         = idletimer_tg_target,
+-      .targetsize     = sizeof(struct idletimer_tg_info),
+-      .usersize       = offsetof(struct idletimer_tg_info, timer),
+-      .checkentry     = idletimer_tg_checkentry,
+-      .destroy        = idletimer_tg_destroy,
+-      .me             = THIS_MODULE,
++              .name           = "IDLETIMER",
++              .family         = NFPROTO_IPV4,
++              .target         = idletimer_tg_target,
++              .targetsize     = sizeof(struct idletimer_tg_info),
++              .usersize       = offsetof(struct idletimer_tg_info, timer),
++              .checkentry     = idletimer_tg_checkentry,
++              .destroy        = idletimer_tg_destroy,
++              .me             = THIS_MODULE,
+       },
+       {
+-      .name           = "IDLETIMER",
+-      .family         = NFPROTO_UNSPEC,
+-      .revision       = 1,
+-      .target         = idletimer_tg_target_v1,
+-      .targetsize     = sizeof(struct idletimer_tg_info_v1),
+-      .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
+-      .checkentry     = idletimer_tg_checkentry_v1,
+-      .destroy        = idletimer_tg_destroy_v1,
+-      .me             = THIS_MODULE,
++              .name           = "IDLETIMER",
++              .family         = NFPROTO_IPV4,
++              .revision       = 1,
++              .target         = idletimer_tg_target_v1,
++              .targetsize     = sizeof(struct idletimer_tg_info_v1),
++              .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
++              .checkentry     = idletimer_tg_checkentry_v1,
++              .destroy        = idletimer_tg_destroy_v1,
++              .me             = THIS_MODULE,
+       },
+-
+-
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "IDLETIMER",
++              .family         = NFPROTO_IPV6,
++              .target         = idletimer_tg_target,
++              .targetsize     = sizeof(struct idletimer_tg_info),
++              .usersize       = offsetof(struct idletimer_tg_info, timer),
++              .checkentry     = idletimer_tg_checkentry,
++              .destroy        = idletimer_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "IDLETIMER",
++              .family         = NFPROTO_IPV6,
++              .revision       = 1,
++              .target         = idletimer_tg_target_v1,
++              .targetsize     = sizeof(struct idletimer_tg_info_v1),
++              .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
++              .checkentry     = idletimer_tg_checkentry_v1,
++              .destroy        = idletimer_tg_destroy_v1,
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static struct class *idletimer_tg_class;
+diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
+index 0371c387b0d1f..211bfa2a2ac04 100644
+--- a/net/netfilter/xt_LED.c
++++ b/net/netfilter/xt_LED.c
+@@ -176,26 +176,41 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
+       kfree(ledinternal);
+ }
+ 
+-static struct xt_target led_tg_reg __read_mostly = {
+-      .name           = "LED",
+-      .revision       = 0,
+-      .family         = NFPROTO_UNSPEC,
+-      .target         = led_tg,
+-      .targetsize     = sizeof(struct xt_led_info),
+-      .usersize       = offsetof(struct xt_led_info, internal_data),
+-      .checkentry     = led_tg_check,
+-      .destroy        = led_tg_destroy,
+-      .me             = THIS_MODULE,
++static struct xt_target led_tg_reg[] __read_mostly = {
++      {
++              .name           = "LED",
++              .revision       = 0,
++              .family         = NFPROTO_IPV4,
++              .target         = led_tg,
++              .targetsize     = sizeof(struct xt_led_info),
++              .usersize       = offsetof(struct xt_led_info, internal_data),
++              .checkentry     = led_tg_check,
++              .destroy        = led_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "LED",
++              .revision       = 0,
++              .family         = NFPROTO_IPV6,
++              .target         = led_tg,
++              .targetsize     = sizeof(struct xt_led_info),
++              .usersize       = offsetof(struct xt_led_info, internal_data),
++              .checkentry     = led_tg_check,
++              .destroy        = led_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init led_tg_init(void)
+ {
+-      return xt_register_target(&led_tg_reg);
++      return xt_register_targets(led_tg_reg, ARRAY_SIZE(led_tg_reg));
+ }
+ 
+ static void __exit led_tg_exit(void)
+ {
+-      xt_unregister_target(&led_tg_reg);
++      xt_unregister_targets(led_tg_reg, ARRAY_SIZE(led_tg_reg));
+ }
+ 
+ module_init(led_tg_init);
+diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
+index e660c3710a109..d80abd6ccaf8f 100644
+--- a/net/netfilter/xt_NFLOG.c
++++ b/net/netfilter/xt_NFLOG.c
+@@ -64,25 +64,39 @@ static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
+       nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
+ }
+ 
+-static struct xt_target nflog_tg_reg __read_mostly = {
+-      .name       = "NFLOG",
+-      .revision   = 0,
+-      .family     = NFPROTO_UNSPEC,
+-      .checkentry = nflog_tg_check,
+-      .destroy    = nflog_tg_destroy,
+-      .target     = nflog_tg,
+-      .targetsize = sizeof(struct xt_nflog_info),
+-      .me         = THIS_MODULE,
++static struct xt_target nflog_tg_reg[] __read_mostly = {
++      {
++              .name       = "NFLOG",
++              .revision   = 0,
++              .family     = NFPROTO_IPV4,
++              .checkentry = nflog_tg_check,
++              .destroy    = nflog_tg_destroy,
++              .target     = nflog_tg,
++              .targetsize = sizeof(struct xt_nflog_info),
++              .me         = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name       = "NFLOG",
++              .revision   = 0,
++              .family     = NFPROTO_IPV4,
++              .checkentry = nflog_tg_check,
++              .destroy    = nflog_tg_destroy,
++              .target     = nflog_tg,
++              .targetsize = sizeof(struct xt_nflog_info),
++              .me         = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init nflog_tg_init(void)
+ {
+-      return xt_register_target(&nflog_tg_reg);
++      return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+ }
+ 
+ static void __exit nflog_tg_exit(void)
+ {
+-      xt_unregister_target(&nflog_tg_reg);
++      xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+ }
+ 
+ module_init(nflog_tg_init);
+diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
+index 0d5c422f87452..e6b9a7d4bd86b 100644
+--- a/net/netfilter/xt_RATEEST.c
++++ b/net/netfilter/xt_RATEEST.c
+@@ -178,16 +178,31 @@ static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
+       xt_rateest_put(par->net, info->est);
+ }
+ 
+-static struct xt_target xt_rateest_tg_reg __read_mostly = {
+-      .name       = "RATEEST",
+-      .revision   = 0,
+-      .family     = NFPROTO_UNSPEC,
+-      .target     = xt_rateest_tg,
+-      .checkentry = xt_rateest_tg_checkentry,
+-      .destroy    = xt_rateest_tg_destroy,
+-      .targetsize = sizeof(struct xt_rateest_target_info),
+-      .usersize   = offsetof(struct xt_rateest_target_info, est),
+-      .me         = THIS_MODULE,
++static struct xt_target xt_rateest_tg_reg[] __read_mostly = {
++      {
++              .name       = "RATEEST",
++              .revision   = 0,
++              .family     = NFPROTO_IPV4,
++              .target     = xt_rateest_tg,
++              .checkentry = xt_rateest_tg_checkentry,
++              .destroy    = xt_rateest_tg_destroy,
++              .targetsize = sizeof(struct xt_rateest_target_info),
++              .usersize   = offsetof(struct xt_rateest_target_info, est),
++              .me         = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name       = "RATEEST",
++              .revision   = 0,
++              .family     = NFPROTO_IPV6,
++              .target     = xt_rateest_tg,
++              .checkentry = xt_rateest_tg_checkentry,
++              .destroy    = xt_rateest_tg_destroy,
++              .targetsize = sizeof(struct xt_rateest_target_info),
++              .usersize   = offsetof(struct xt_rateest_target_info, est),
++              .me         = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static __net_init int xt_rateest_net_init(struct net *net)
+@@ -213,12 +228,12 @@ static int __init xt_rateest_tg_init(void)
+ 
+       if (err)
+               return err;
+-      return xt_register_target(&xt_rateest_tg_reg);
++      return xt_register_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg));
+ }
+ 
+ static void __exit xt_rateest_tg_fini(void)
+ {
+-      xt_unregister_target(&xt_rateest_tg_reg);
++      xt_unregister_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg));
+       unregister_pernet_subsys(&xt_rateest_net_ops);
+ }
+ 
+diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
+index 498a0bf6f0444..5bc5ea505eb9e 100644
+--- a/net/netfilter/xt_SECMARK.c
++++ b/net/netfilter/xt_SECMARK.c
+@@ -157,7 +157,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
+       {
+               .name           = "SECMARK",
+               .revision       = 0,
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .checkentry     = secmark_tg_check_v0,
+               .destroy        = secmark_tg_destroy,
+               .target         = secmark_tg_v0,
+@@ -167,7 +167,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
+       {
+               .name           = "SECMARK",
+               .revision       = 1,
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .checkentry     = secmark_tg_check_v1,
+               .destroy        = secmark_tg_destroy,
+               .target         = secmark_tg_v1,
+@@ -175,6 +175,29 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
+               .usersize       = offsetof(struct xt_secmark_target_info_v1, secid),
+               .me             = THIS_MODULE,
+       },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "SECMARK",
++              .revision       = 0,
++              .family         = NFPROTO_IPV6,
++              .checkentry     = secmark_tg_check_v0,
++              .destroy        = secmark_tg_destroy,
++              .target         = secmark_tg_v0,
++              .targetsize     = sizeof(struct xt_secmark_target_info),
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "SECMARK",
++              .revision       = 1,
++              .family         = NFPROTO_IPV6,
++              .checkentry     = secmark_tg_check_v1,
++              .destroy        = secmark_tg_destroy,
++              .target         = secmark_tg_v1,
++              .targetsize     = sizeof(struct xt_secmark_target_info_v1),
++              .usersize       = offsetof(struct xt_secmark_target_info_v1, secid),
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init secmark_tg_init(void)
+diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
+index 5582dce98cae7..f3fa4f11348cd 100644
+--- a/net/netfilter/xt_TRACE.c
++++ b/net/netfilter/xt_TRACE.c
+@@ -29,25 +29,38 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
+       return XT_CONTINUE;
+ }
+ 
+-static struct xt_target trace_tg_reg __read_mostly = {
+-      .name           = "TRACE",
+-      .revision       = 0,
+-      .family         = NFPROTO_UNSPEC,
+-      .table          = "raw",
+-      .target         = trace_tg,
+-      .checkentry     = trace_tg_check,
+-      .destroy        = trace_tg_destroy,
+-      .me             = THIS_MODULE,
++static struct xt_target trace_tg_reg[] __read_mostly = {
++      {
++              .name           = "TRACE",
++              .revision       = 0,
++              .family         = NFPROTO_IPV4,
++              .table          = "raw",
++              .target         = trace_tg,
++              .checkentry     = trace_tg_check,
++              .destroy        = trace_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "TRACE",
++              .revision       = 0,
++              .family         = NFPROTO_IPV6,
++              .table          = "raw",
++              .target         = trace_tg,
++              .checkentry     = trace_tg_check,
++              .destroy        = trace_tg_destroy,
++      },
++#endif
+ };
+ 
+ static int __init trace_tg_init(void)
+ {
+-      return xt_register_target(&trace_tg_reg);
++      return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+ }
+ 
+ static void __exit trace_tg_exit(void)
+ {
+-      xt_unregister_target(&trace_tg_reg);
++      xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+ }
+ 
+ module_init(trace_tg_init);
+diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
+index e9b2181e8c425..a770889431071 100644
+--- a/net/netfilter/xt_addrtype.c
++++ b/net/netfilter/xt_addrtype.c
+@@ -208,13 +208,24 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = {
+       },
+       {
+               .name           = "addrtype",
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .revision       = 1,
+               .match          = addrtype_mt_v1,
+               .checkentry     = addrtype_mt_checkentry_v1,
+               .matchsize      = sizeof(struct xt_addrtype_info_v1),
+               .me             = THIS_MODULE
+-      }
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "addrtype",
++              .family         = NFPROTO_IPV6,
++              .revision       = 1,
++              .match          = addrtype_mt_v1,
++              .checkentry     = addrtype_mt_checkentry_v1,
++              .matchsize      = sizeof(struct xt_addrtype_info_v1),
++              .me             = THIS_MODULE
++      },
++#endif
+ };
+ 
+ static int __init addrtype_mt_init(void)
+diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
+index a047a545371e1..908fd5f2c3c84 100644
+--- a/net/netfilter/xt_cluster.c
++++ b/net/netfilter/xt_cluster.c
+@@ -146,24 +146,37 @@ static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par)
+       nf_ct_netns_put(par->net, par->family);
+ }
+ 
+-static struct xt_match xt_cluster_match __read_mostly = {
+-      .name           = "cluster",
+-      .family         = NFPROTO_UNSPEC,
+-      .match          = xt_cluster_mt,
+-      .checkentry     = xt_cluster_mt_checkentry,
+-      .matchsize      = sizeof(struct xt_cluster_match_info),
+-      .destroy        = xt_cluster_mt_destroy,
+-      .me             = THIS_MODULE,
++static struct xt_match xt_cluster_match[] __read_mostly = {
++      {
++              .name           = "cluster",
++              .family         = NFPROTO_IPV4,
++              .match          = xt_cluster_mt,
++              .checkentry     = xt_cluster_mt_checkentry,
++              .matchsize      = sizeof(struct xt_cluster_match_info),
++              .destroy        = xt_cluster_mt_destroy,
++              .me             = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "cluster",
++              .family         = NFPROTO_IPV6,
++              .match          = xt_cluster_mt,
++              .checkentry     = xt_cluster_mt_checkentry,
++              .matchsize      = sizeof(struct xt_cluster_match_info),
++              .destroy        = xt_cluster_mt_destroy,
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init xt_cluster_mt_init(void)
+ {
+-      return xt_register_match(&xt_cluster_match);
++      return xt_register_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match));
+ }
+ 
+ static void __exit xt_cluster_mt_fini(void)
+ {
+-      xt_unregister_match(&xt_cluster_match);
++      xt_unregister_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match));
+ }
+ 
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
+index 93cb018c3055f..2aabdcea87072 100644
+--- a/net/netfilter/xt_connbytes.c
++++ b/net/netfilter/xt_connbytes.c
+@@ -111,9 +111,11 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
+               return -EINVAL;
+ 
+       ret = nf_ct_netns_get(par->net, par->family);
+-      if (ret < 0)
++      if (ret < 0) {
+               pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+                                   par->family);
++              return ret;
++      }
+ 
+       /*
+        * This filter cannot function correctly unless connection tracking
+diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
+index 46fcac75f7268..9943a2bf7a7b8 100644
+--- a/net/netfilter/xt_connlimit.c
++++ b/net/netfilter/xt_connlimit.c
+@@ -106,26 +106,41 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+       nf_conncount_destroy(par->net, par->family, info->data);
+ }
+ 
+-static struct xt_match connlimit_mt_reg __read_mostly = {
+-      .name       = "connlimit",
+-      .revision   = 1,
+-      .family     = NFPROTO_UNSPEC,
+-      .checkentry = connlimit_mt_check,
+-      .match      = connlimit_mt,
+-      .matchsize  = sizeof(struct xt_connlimit_info),
+-      .usersize   = offsetof(struct xt_connlimit_info, data),
+-      .destroy    = connlimit_mt_destroy,
+-      .me         = THIS_MODULE,
++static struct xt_match connlimit_mt_reg[] __read_mostly = {
++      {
++              .name       = "connlimit",
++              .revision   = 1,
++              .family     = NFPROTO_IPV4,
++              .checkentry = connlimit_mt_check,
++              .match      = connlimit_mt,
++              .matchsize  = sizeof(struct xt_connlimit_info),
++              .usersize   = offsetof(struct xt_connlimit_info, data),
++              .destroy    = connlimit_mt_destroy,
++              .me         = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name       = "connlimit",
++              .revision   = 1,
++              .family     = NFPROTO_IPV6,
++              .checkentry = connlimit_mt_check,
++              .match      = connlimit_mt,
++              .matchsize  = sizeof(struct xt_connlimit_info),
++              .usersize   = offsetof(struct xt_connlimit_info, data),
++              .destroy    = connlimit_mt_destroy,
++              .me         = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static int __init connlimit_mt_init(void)
+ {
+-      return xt_register_match(&connlimit_mt_reg);
++      return xt_register_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
+ }
+ 
+ static void __exit connlimit_mt_exit(void)
+ {
+-      xt_unregister_match(&connlimit_mt_reg);
++      xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
+ }
+ 
+ module_init(connlimit_mt_init);
+diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
+index ad3c033db64e7..4277084de2e70 100644
+--- a/net/netfilter/xt_connmark.c
++++ b/net/netfilter/xt_connmark.c
+@@ -151,7 +151,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
+       {
+               .name           = "CONNMARK",
+               .revision       = 1,
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .checkentry     = connmark_tg_check,
+               .target         = connmark_tg,
+               .targetsize     = sizeof(struct xt_connmark_tginfo1),
+@@ -161,13 +161,35 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
+       {
+               .name           = "CONNMARK",
+               .revision       = 2,
+-              .family         = NFPROTO_UNSPEC,
++              .family         = NFPROTO_IPV4,
+               .checkentry     = connmark_tg_check,
+               .target         = connmark_tg_v2,
+               .targetsize     = sizeof(struct xt_connmark_tginfo2),
+               .destroy        = connmark_tg_destroy,
+               .me             = THIS_MODULE,
+-      }
++      },
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "CONNMARK",
++              .revision       = 1,
++              .family         = NFPROTO_IPV6,
++              .checkentry     = connmark_tg_check,
++              .target         = connmark_tg,
++              .targetsize     = sizeof(struct xt_connmark_tginfo1),
++              .destroy        = connmark_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++      {
++              .name           = "CONNMARK",
++              .revision       = 2,
++              .family         = NFPROTO_IPV6,
++              .checkentry     = connmark_tg_check,
++              .target         = connmark_tg_v2,
++              .targetsize     = sizeof(struct xt_connmark_tginfo2),
++              .destroy        = connmark_tg_destroy,
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static struct xt_match connmark_mt_reg __read_mostly = {
+diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
+index 1ad74b5920b53..f76fe04fc9a4e 100644
+--- a/net/netfilter/xt_mark.c
++++ b/net/netfilter/xt_mark.c
+@@ -39,13 +39,35 @@ mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
+       return ((skb->mark & info->mask) == info->mark) ^ info->invert;
+ }
+ 
+-static struct xt_target mark_tg_reg __read_mostly = {
+-      .name           = "MARK",
+-      .revision       = 2,
+-      .family         = NFPROTO_UNSPEC,
+-      .target         = mark_tg,
+-      .targetsize     = sizeof(struct xt_mark_tginfo2),
+-      .me             = THIS_MODULE,
++static struct xt_target mark_tg_reg[] __read_mostly = {
++      {
++              .name           = "MARK",
++              .revision       = 2,
++              .family         = NFPROTO_IPV4,
++              .target         = mark_tg,
++              .targetsize     = sizeof(struct xt_mark_tginfo2),
++              .me             = THIS_MODULE,
++      },
++#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES)
++      {
++              .name           = "MARK",
++              .revision       = 2,
++              .family         = NFPROTO_ARP,
++              .target         = mark_tg,
++              .targetsize     = sizeof(struct xt_mark_tginfo2),
++              .me             = THIS_MODULE,
++      },
++#endif
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
++      {
++              .name           = "MARK",
++              .revision       = 2,
++              .family         = NFPROTO_IPV4,
++              .target         = mark_tg,
++              .targetsize     = sizeof(struct xt_mark_tginfo2),
++              .me             = THIS_MODULE,
++      },
++#endif
+ };
+ 
+ static struct xt_match mark_mt_reg __read_mostly = {
+@@ -61,12 +83,12 @@ static int __init mark_mt_init(void)
+ {
+       int ret;
+ 
+-      ret = xt_register_target(&mark_tg_reg);
++      ret = xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+       if (ret < 0)
+               return ret;
+       ret = xt_register_match(&mark_mt_reg);
+       if (ret < 0) {
+-              xt_unregister_target(&mark_tg_reg);
++              xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+               return ret;
+       }
+       return 0;
+@@ -75,7 +97,7 @@ static int __init mark_mt_init(void)
+ static void __exit mark_mt_exit(void)
+ {
+       xt_unregister_match(&mark_mt_reg);
+-      xt_unregister_target(&mark_tg_reg);
++      xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+ }
+ 
+ module_init(mark_mt_init);
+-- 
+2.43.0
+
diff --git a/queue-5.15/nfsd-mark-filecache-down-if-init-fails.patch b/queue-5.15/nfsd-mark-filecache-down-if-init-fails.patch

new file mode 100644 (file)

index 0000000..86b03a3
--- /dev/null
+++ b/queue-5.15/nfsd-mark-filecache-down-if-init-fails.patch
@@ -0,0 +1,48 @@
+From e2570e0498e8d663fab1958bc9e2c8e4f114683b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Sep 2024 14:25:37 -0400
+Subject: NFSD: Mark filecache "down" if init fails
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit dc0d0f885aa422f621bc1c2124133eff566b0bc8 ]
+
+NeilBrown says:
+> The handling of NFSD_FILE_CACHE_UP is strange.  nfsd_file_cache_init()
+> sets it, but doesn't clear it on failure.  So if nfsd_file_cache_init()
+> fails for some reason, nfsd_file_cache_shutdown() would still try to
+> clean up if it was called.
+
+Reported-by: NeilBrown <neilb@suse.de>
+Fixes: c7b824c3d06c ("NFSD: Replace the "init once" mechanism")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 31169f0cc3d74..585163b4e11ce 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -717,7 +717,7 @@ nfsd_file_cache_init(void)
+ 
+       ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
+       if (ret)
+-              return ret;
++              goto out;
+ 
+       ret = -ENOMEM;
+       nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+@@ -769,6 +769,8 @@ nfsd_file_cache_init(void)
+ 
+       INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
+ out:
++      if (ret)
++              clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags);
+       return ret;
+ out_notifier:
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+-- 
+2.43.0
+
diff --git a/queue-5.15/nfsv4-prevent-null-pointer-dereference-in-nfs42_comp.patch b/queue-5.15/nfsv4-prevent-null-pointer-dereference-in-nfs42_comp.patch

new file mode 100644 (file)

index 0000000..7715fdf
--- /dev/null
+++ b/queue-5.15/nfsv4-prevent-null-pointer-dereference-in-nfs42_comp.patch
@@ -0,0 +1,151 @@
+From 7467e2cf08b457850a208433bbadc75411b45cc3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 16:39:30 +0800
+Subject: NFSv4: Prevent NULL-pointer dereference in nfs42_complete_copies()
+
+From: Yanjun Zhang <zhangyanjun@cestc.cn>
+
+[ Upstream commit a848c29e3486189aaabd5663bc11aea50c5bd144 ]
+
+On the node of an NFS client, some files saved in the mountpoint of the
+NFS server were copied to another location of the same NFS server.
+Accidentally, the nfs42_complete_copies() got a NULL-pointer dereference
+crash with the following syslog:
+
+[232064.838881] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116
+[232064.839360] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116
+[232066.588183] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+[232066.588586] Mem abort info:
+[232066.588701]   ESR = 0x0000000096000007
+[232066.588862]   EC = 0x25: DABT (current EL), IL = 32 bits
+[232066.589084]   SET = 0, FnV = 0
+[232066.589216]   EA = 0, S1PTW = 0
+[232066.589340]   FSC = 0x07: level 3 translation fault
+[232066.589559] Data abort info:
+[232066.589683]   ISV = 0, ISS = 0x00000007
+[232066.589842]   CM = 0, WnR = 0
+[232066.589967] user pgtable: 64k pages, 48-bit VAs, pgdp=00002000956ff400
+[232066.590231] [0000000000000058] pgd=08001100ae100003, p4d=08001100ae100003, pud=08001100ae100003, pmd=08001100b3c00003, pte=0000000000000000
+[232066.590757] Internal error: Oops: 96000007 [#1] SMP
+[232066.590958] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm vhost_net vhost vhost_iotlb tap tun ipt_rpfilter xt_multiport ip_set_hash_ip ip_set_hash_net xfrm_interface xfrm6_tunnel tunnel4 tunnel6 esp4 ah4 wireguard libcurve25519_generic veth xt_addrtype xt_set nf_conntrack_netlink ip_set_hash_ipportnet ip_set_hash_ipportip ip_set_bitmap_port ip_set_hash_ipport dummy ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs iptable_filter sch_ingress nfnetlink_cttimeout vport_gre ip_gre ip_tunnel gre vport_geneve geneve vport_vxlan vxlan ip6_udp_tunnel udp_tunnel openvswitch nf_conncount dm_round_robin dm_service_time dm_multipath xt_nat xt_MASQUERADE nft_chain_nat nf_nat xt_mark xt_conntrack xt_comment nft_compat nft_counter nf_tables nfnetlink ocfs2 ocfs2_nodemanager ocfs2_stackglue iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ipmi_ssif nbd overlay 8021q garp mrp bonding tls rfkill sunrpc ext4 mbcache jbd2
+[232066.591052]  vfat fat cas_cache cas_disk ses enclosure scsi_transport_sas sg acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler ip_tables vfio_pci vfio_pci_core vfio_virqfd vfio_iommu_type1 vfio dm_mirror dm_region_hash dm_log dm_mod nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc fuse xfs libcrc32c ast drm_vram_helper qla2xxx drm_kms_helper syscopyarea crct10dif_ce sysfillrect ghash_ce sysimgblt sha2_ce fb_sys_fops cec sha256_arm64 sha1_ce drm_ttm_helper ttm nvme_fc igb sbsa_gwdt nvme_fabrics drm nvme_core i2c_algo_bit i40e scsi_transport_fc megaraid_sas aes_neon_bs
+[232066.596953] CPU: 6 PID: 4124696 Comm: 10.253.166.125- Kdump: loaded Not tainted 5.15.131-9.cl9_ocfs2.aarch64 #1
+[232066.597356] Hardware name: Great Wall .\x93\x8e...RF6260 V5/GWMSSE2GL1T, BIOS T656FBE_V3.0.18 2024-01-06
+[232066.597721] pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[232066.598034] pc : nfs4_reclaim_open_state+0x220/0x800 [nfsv4]
+[232066.598327] lr : nfs4_reclaim_open_state+0x12c/0x800 [nfsv4]
+[232066.598595] sp : ffff8000f568fc70
+[232066.598731] x29: ffff8000f568fc70 x28: 0000000000001000 x27: ffff21003db33000
+[232066.599030] x26: ffff800005521ae0 x25: ffff0100f98fa3f0 x24: 0000000000000001
+[232066.599319] x23: ffff800009920008 x22: ffff21003db33040 x21: ffff21003db33050
+[232066.599628] x20: ffff410172fe9e40 x19: ffff410172fe9e00 x18: 0000000000000000
+[232066.599914] x17: 0000000000000000 x16: 0000000000000004 x15: 0000000000000000
+[232066.600195] x14: 0000000000000000 x13: ffff800008e685a8 x12: 00000000eac0c6e6
+[232066.600498] x11: 0000000000000000 x10: 0000000000000008 x9 : ffff8000054e5828
+[232066.600784] x8 : 00000000ffffffbf x7 : 0000000000000001 x6 : 000000000a9eb14a
+[232066.601062] x5 : 0000000000000000 x4 : ffff70ff8a14a800 x3 : 0000000000000058
+[232066.601348] x2 : 0000000000000001 x1 : 54dce46366daa6c6 x0 : 0000000000000000
+[232066.601636] Call trace:
+[232066.601749]  nfs4_reclaim_open_state+0x220/0x800 [nfsv4]
+[232066.601998]  nfs4_do_reclaim+0x1b8/0x28c [nfsv4]
+[232066.602218]  nfs4_state_manager+0x928/0x10f0 [nfsv4]
+[232066.602455]  nfs4_run_state_manager+0x78/0x1b0 [nfsv4]
+[232066.602690]  kthread+0x110/0x114
+[232066.602830]  ret_from_fork+0x10/0x20
+[232066.602985] Code: 1400000d f9403f20 f9402e61 91016003 (f9402c00)
+[232066.603284] SMP: stopping secondary CPUs
+[232066.606936] Starting crashdump kernel...
+[232066.607146] Bye!
+
+Analysing the vmcore, we know that nfs4_copy_state listed by destination
+nfs_server->ss_copies was added by the field copies in handle_async_copy(),
+and we found a waiting copy process with the stack as:
+PID: 3511963  TASK: ffff710028b47e00  CPU: 0   COMMAND: "cp"
+ #0 [ffff8001116ef740] __switch_to at ffff8000081b92f4
+ #1 [ffff8001116ef760] __schedule at ffff800008dd0650
+ #2 [ffff8001116ef7c0] schedule at ffff800008dd0a00
+ #3 [ffff8001116ef7e0] schedule_timeout at ffff800008dd6aa0
+ #4 [ffff8001116ef860] __wait_for_common at ffff800008dd166c
+ #5 [ffff8001116ef8e0] wait_for_completion_interruptible at ffff800008dd1898
+ #6 [ffff8001116ef8f0] handle_async_copy at ffff8000055142f4 [nfsv4]
+ #7 [ffff8001116ef970] _nfs42_proc_copy at ffff8000055147c8 [nfsv4]
+ #8 [ffff8001116efa80] nfs42_proc_copy at ffff800005514cf0 [nfsv4]
+ #9 [ffff8001116efc50] __nfs4_copy_file_range.constprop.0 at ffff8000054ed694 [nfsv4]
+
+The NULL-pointer dereference was due to nfs42_complete_copies() listed
+the nfs_server->ss_copies by the field ss_copies of nfs4_copy_state.
+So the nfs4_copy_state address ffff0100f98fa3f0 was offset by 0x10 and
+the data accessed through this pointer was also incorrect. Generally,
+the ordered list nfs4_state_owner->so_states indicate open(O_RDWR) or
+open(O_WRITE) states are reclaimed firstly by nfs4_reclaim_open_state().
+When destination state reclaim is failed with NFS_STATE_RECOVERY_FAILED
+and copies are not deleted in nfs_server->ss_copies, the source state
+may be passed to the nfs42_complete_copies() process earlier, resulting
+in this crash scene finally. To solve this issue, we add a list_head
+nfs_server->ss_src_copies for a server-to-server copy specially.
+
+Fixes: 0e65a32c8a56 ("NFS: handle source server reboot")
+Signed-off-by: Yanjun Zhang <zhangyanjun@cestc.cn>
+Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/client.c           | 1 +
+ fs/nfs/nfs42proc.c        | 2 +-
+ fs/nfs/nfs4state.c        | 2 +-
+ include/linux/nfs_fs_sb.h | 1 +
+ 4 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index 9e3a3570efc0f..10eef1368114e 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -944,6 +944,7 @@ struct nfs_server *nfs_alloc_server(void)
+       INIT_LIST_HEAD(&server->layouts);
+       INIT_LIST_HEAD(&server->state_owners_lru);
+       INIT_LIST_HEAD(&server->ss_copies);
++      INIT_LIST_HEAD(&server->ss_src_copies);
+ 
+       atomic_set(&server->active, 0);
+ 
+diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
+index 2975bbc33d280..eb347742e611b 100644
+--- a/fs/nfs/nfs42proc.c
++++ b/fs/nfs/nfs42proc.c
+@@ -210,7 +210,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
+ 
+       if (dst_server != src_server) {
+               spin_lock(&src_server->nfs_client->cl_lock);
+-              list_add_tail(&copy->src_copies, &src_server->ss_copies);
++              list_add_tail(&copy->src_copies, &src_server->ss_src_copies);
+               spin_unlock(&src_server->nfs_client->cl_lock);
+       }
+ 
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 453d32f464038..76e2cdddf95c1 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1597,7 +1597,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
+                       complete(&copy->completion);
+               }
+       }
+-      list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
++      list_for_each_entry(copy, &sp->so_server->ss_src_copies, src_copies) {
+               if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
+                               !nfs4_stateid_match_other(&state->stateid,
+                               &copy->parent_src_state->stateid)))
+diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
+index 5e065f16d061d..091fefc5e3615 100644
+--- a/include/linux/nfs_fs_sb.h
++++ b/include/linux/nfs_fs_sb.h
+@@ -238,6 +238,7 @@ struct nfs_server {
+       struct list_head        layouts;
+       struct list_head        delegations;
+       struct list_head        ss_copies;
++      struct list_head        ss_src_copies;
+ 
+       unsigned long           mig_gen;
+       unsigned long           mig_status;
+-- 
+2.43.0
+
diff --git a/queue-5.15/ppp-fix-ppp_async_encode-illegal-access.patch b/queue-5.15/ppp-fix-ppp_async_encode-illegal-access.patch

new file mode 100644 (file)

index 0000000..3337eae
--- /dev/null
+++ b/queue-5.15/ppp-fix-ppp_async_encode-illegal-access.patch
@@ -0,0 +1,91 @@
+From 41b0248a0c3195f6692be4b6111bf23d07c199d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 18:58:02 +0000
+Subject: ppp: fix ppp_async_encode() illegal access
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 40dddd4b8bd08a69471efd96107a4e1c73fabefc ]
+
+syzbot reported an issue in ppp_async_encode() [1]
+
+In this case, pppoe_sendmsg() is called with a zero size.
+Then ppp_async_encode() is called with an empty skb.
+
+BUG: KMSAN: uninit-value in ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline]
+ BUG: KMSAN: uninit-value in ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675
+  ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline]
+  ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675
+  ppp_async_send+0x130/0x1b0 drivers/net/ppp/ppp_async.c:634
+  ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2280 [inline]
+  ppp_input+0x1f1/0xe60 drivers/net/ppp/ppp_generic.c:2304
+  pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379
+  sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113
+  __release_sock+0x1da/0x330 net/core/sock.c:3072
+  release_sock+0x6b/0x250 net/core/sock.c:3626
+  pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:4092 [inline]
+  slab_alloc_node mm/slub.c:4135 [inline]
+  kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4187
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587
+  __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678
+  alloc_skb include/linux/skbuff.h:1322 [inline]
+  sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732
+  pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+CPU: 1 UID: 0 PID: 5411 Comm: syz.1.14 Not tainted 6.12.0-rc1-syzkaller-00165-g360c1f1f24c6 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+1d121645899e7692f92a@syzkaller.appspotmail.com
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241009185802.3763282-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/ppp_async.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
+index 4a5c749ba223d..f7cd56d4677ea 100644
+--- a/drivers/net/ppp/ppp_async.c
++++ b/drivers/net/ppp/ppp_async.c
+@@ -552,7 +552,7 @@ ppp_async_encode(struct asyncppp *ap)
+        * and 7 (code-reject) must be sent as though no options
+        * had been negotiated.
+        */
+-      islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7;
++      islcp = proto == PPP_LCP && count >= 3 && 1 <= data[2] && data[2] <= 7;
+ 
+       if (i == 0) {
+               if (islcp)
+-- 
+2.43.0
+
diff --git a/queue-5.15/rtnetlink-add-bulk-registration-helpers-for-rtnetlin.patch b/queue-5.15/rtnetlink-add-bulk-registration-helpers-for-rtnetlin.patch

new file mode 100644 (file)

index 0000000..d8ecf2c
--- /dev/null
+++ b/queue-5.15/rtnetlink-add-bulk-registration-helpers-for-rtnetlin.patch
@@ -0,0 +1,110 @@
+From a97f7bd1cc9ff7fc47d2667702948c34ce927004 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 11:47:32 -0700
+Subject: rtnetlink: Add bulk registration helpers for rtnetlink message
+ handlers.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 07cc7b0b942bf55ef1a471470ecda8d2a6a6541f ]
+
+Before commit addf9b90de22 ("net: rtnetlink: use rcu to free rtnl message
+handlers"), once rtnl_msg_handlers[protocol] was allocated, the following
+rtnl_register_module() for the same protocol never failed.
+
+However, after the commit, rtnl_msg_handler[protocol][msgtype] needs to
+be allocated in each rtnl_register_module(), so each call could fail.
+
+Many callers of rtnl_register_module() do not handle the returned error,
+and we need to add many error handlings.
+
+To handle that easily, let's add wrapper functions for bulk registration
+of rtnetlink message handlers.
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: d51705614f66 ("mctp: Handle error of rtnl_register_module().")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/rtnetlink.h | 17 +++++++++++++++++
+ net/core/rtnetlink.c    | 29 +++++++++++++++++++++++++++++
+ 2 files changed, 46 insertions(+)
+
+diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
+index c9d3ae92c9321..dcb1c92e69879 100644
+--- a/include/net/rtnetlink.h
++++ b/include/net/rtnetlink.h
+@@ -20,6 +20,15 @@ enum rtnl_kinds {
+       RTNL_KIND_SET
+ };
+ 
++struct rtnl_msg_handler {
++      struct module *owner;
++      int protocol;
++      int msgtype;
++      rtnl_doit_func doit;
++      rtnl_dumpit_func dumpit;
++      int flags;
++};
++
+ void rtnl_register(int protocol, int msgtype,
+                  rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
+ int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+@@ -27,6 +36,14 @@ int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+ int rtnl_unregister(int protocol, int msgtype);
+ void rtnl_unregister_all(int protocol);
+ 
++int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n);
++void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n);
++
++#define rtnl_register_many(handlers)                          \
++      __rtnl_register_many(handlers, ARRAY_SIZE(handlers))
++#define rtnl_unregister_many(handlers)                                \
++      __rtnl_unregister_many(handlers, ARRAY_SIZE(handlers))
++
+ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
+ {
+       if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg))
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 8fc86d1edf561..24795110b2ff3 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -348,6 +348,35 @@ void rtnl_unregister_all(int protocol)
+ }
+ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
+ 
++int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n)
++{
++      const struct rtnl_msg_handler *handler;
++      int i, err;
++
++      for (i = 0, handler = handlers; i < n; i++, handler++) {
++              err = rtnl_register_internal(handler->owner, handler->protocol,
++                                           handler->msgtype, handler->doit,
++                                           handler->dumpit, handler->flags);
++              if (err) {
++                      __rtnl_unregister_many(handlers, i);
++                      break;
++              }
++      }
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(__rtnl_register_many);
++
++void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n)
++{
++      const struct rtnl_msg_handler *handler;
++      int i;
++
++      for (i = n - 1, handler = handlers + n - 1; i >= 0; i--, handler--)
++              rtnl_unregister(handler->protocol, handler->msgtype);
++}
++EXPORT_SYMBOL_GPL(__rtnl_unregister_many);
++
+ static LIST_HEAD(link_ops);
+ 
+ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+-- 
+2.43.0
+
diff --git a/queue-5.15/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch b/queue-5.15/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch

new file mode 100644 (file)

index 0000000..21c3ec1
--- /dev/null
+++ b/queue-5.15/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch
@@ -0,0 +1,78 @@
+From def44ffcf17e438f4501e3197e4d6ec5d0941dee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 12:25:11 -0400
+Subject: sctp: ensure sk_state is set to CLOSED if hashing fails in
+ sctp_listen_start
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 4d5c70e6155d5eae198bade4afeab3c1b15073b6 ]
+
+If hashing fails in sctp_listen_start(), the socket remains in the
+LISTENING state, even though it was not added to the hash table.
+This can lead to a scenario where a socket appears to be listening
+without actually being accessible.
+
+This patch ensures that if the hashing operation fails, the sk_state
+is set back to CLOSED before returning an error.
+
+Note that there is no need to undo the autobind operation if hashing
+fails, as the bind port can still be used for next listen() call on
+the same socket.
+
+Fixes: 76c6d988aeb3 ("sctp: add sock_reuseport for the sock in __sctp_hash_endpoint")
+Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/socket.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 2818cf160f3a3..528d9ecf1dd86 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8516,6 +8516,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
+       struct sctp_endpoint *ep = sp->ep;
+       struct crypto_shash *tfm = NULL;
+       char alg[32];
++      int err;
+ 
+       /* Allocate HMAC for generating cookie. */
+       if (!sp->hmac && sp->sctp_hmac_alg) {
+@@ -8543,18 +8544,25 @@ static int sctp_listen_start(struct sock *sk, int backlog)
+       inet_sk_set_state(sk, SCTP_SS_LISTENING);
+       if (!ep->base.bind_addr.port) {
+               if (sctp_autobind(sk)) {
+-                      inet_sk_set_state(sk, SCTP_SS_CLOSED);
+-                      return -EAGAIN;
++                      err = -EAGAIN;
++                      goto err;
+               }
+       } else {
+               if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
+-                      inet_sk_set_state(sk, SCTP_SS_CLOSED);
+-                      return -EADDRINUSE;
++                      err = -EADDRINUSE;
++                      goto err;
+               }
+       }
+ 
+       WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
+-      return sctp_hash_endpoint(ep);
++      err = sctp_hash_endpoint(ep);
++      if (err)
++              goto err;
++
++      return 0;
++err:
++      inet_sk_set_state(sk, SCTP_SS_CLOSED);
++      return err;
+ }
+ 
+ /*
+-- 
+2.43.0
+
diff --git a/queue-5.15/series b/queue-5.15/series

index d7c3f80d42cd86f0994ef40456dcc4ac1c30cea1..966fc343ed252b1174f9d51eee288a4159a4e4bb 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -628,3 +628,38 @@ driver-core-bus-return-eio-instead-of-0-when-show-st.patch
  drm-amd-display-check-null-pointer-before-dereferenc.patch
  fbdev-sisfb-fix-strbuf-array-overflow.patch
  rdma-rxe-fix-seg-fault-in-rxe_comp_queue_pkt.patch
+nfsd-mark-filecache-down-if-init-fails.patch
+ice-fix-vlan-replay-after-reset.patch
+sunrpc-fix-integer-overflow-in-decode_rc_list.patch
+nfsv4-prevent-null-pointer-dereference-in-nfs42_comp.patch
+net-phy-dp83869-fix-memory-corruption-when-enabling-.patch
+tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch
+tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch
+netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch
+bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch
+net-phy-bcm84881-fix-some-error-handling-paths.patch
+thermal-int340x-processor_thermal-set-feature-mask-b.patch
+thermal-intel-int340x-processor-fix-warning-during-m.patch
+net-dsa-b53-fix-jumbo-frame-mtu-check.patch
+net-dsa-b53-fix-max-mtu-for-1g-switches.patch
+net-dsa-b53-fix-max-mtu-for-bcm5325-bcm5365.patch
+net-dsa-b53-allow-lower-mtus-on-bcm5325-5365.patch
+net-dsa-b53-fix-jumbo-frames-on-10-100-ports.patch
+gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch
+gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch
+ice-fix-netif_is_ice-in-safe-mode.patch
+i40e-fix-macvlan-leak-by-synchronizing-access-to-mac.patch
+igb-do-not-bring-the-device-up-after-non-fatal-error.patch
+net-sched-accept-tca_stab-only-for-root-qdisc.patch
+net-ibm-emac-mal-fix-wrong-goto.patch
+sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch
+netfilter-xtables-avoid-nfproto_unspec-where-needed.patch
+net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch
+netfilter-rpfilter-fib-populate-flowic_l3mdev-field.patch
+netfilter-rpfilter-fib-set-flowic_uid-correctly-for-.patch
+netfilter-fib-check-correct-rtable-in-vrf-setups.patch
+net-rtnetlink-add-msg-kind-names.patch
+rtnetlink-add-bulk-registration-helpers-for-rtnetlin.patch
+mctp-handle-error-of-rtnl_register_module.patch
+ppp-fix-ppp_async_encode-illegal-access.patch
+slip-make-slhc_remember-more-robust-against-maliciou.patch
diff --git a/queue-5.15/slip-make-slhc_remember-more-robust-against-maliciou.patch b/queue-5.15/slip-make-slhc_remember-more-robust-against-maliciou.patch

new file mode 100644 (file)

index 0000000..38165f6
--- /dev/null
+++ b/queue-5.15/slip-make-slhc_remember-more-robust-against-maliciou.patch
@@ -0,0 +1,170 @@
+From 190d306acadf0db969840548f9b836611106a678 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 09:11:32 +0000
+Subject: slip: make slhc_remember() more robust against malicious packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7d3fce8cbe3a70a1c7c06c9b53696be5d5d8dd5c ]
+
+syzbot found that slhc_remember() was missing checks against
+malicious packets [1].
+
+slhc_remember() only checked the size of the packet was at least 20,
+which is not good enough.
+
+We need to make sure the packet includes the IPv4 and TCP header
+that are supposed to be carried.
+
+Add iph and th pointers to make the code more readable.
+
+[1]
+
+BUG: KMSAN: uninit-value in slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666
+  slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666
+  ppp_receive_nonmp_frame+0xe45/0x35e0 drivers/net/ppp/ppp_generic.c:2455
+  ppp_receive_frame drivers/net/ppp/ppp_generic.c:2372 [inline]
+  ppp_do_recv+0x65f/0x40d0 drivers/net/ppp/ppp_generic.c:2212
+  ppp_input+0x7dc/0xe60 drivers/net/ppp/ppp_generic.c:2327
+  pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379
+  sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113
+  __release_sock+0x1da/0x330 net/core/sock.c:3072
+  release_sock+0x6b/0x250 net/core/sock.c:3626
+  pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:4091 [inline]
+  slab_alloc_node mm/slub.c:4134 [inline]
+  kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4186
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587
+  __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678
+  alloc_skb include/linux/skbuff.h:1322 [inline]
+  sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732
+  pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867
+  sock_sendmsg_nosec net/socket.c:729 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:744
+  ____sys_sendmsg+0x903/0xb60 net/socket.c:2602
+  ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656
+  __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742
+  __do_sys_sendmmsg net/socket.c:2771 [inline]
+  __se_sys_sendmmsg net/socket.c:2768 [inline]
+  __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768
+  x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+CPU: 0 UID: 0 PID: 5460 Comm: syz.2.33 Not tainted 6.12.0-rc2-syzkaller-00006-g87d6aab2389e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+
+Fixes: b5451d783ade ("slip: Move the SLIP drivers")
+Reported-by: syzbot+2ada1bc857496353be5a@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/670646db.050a0220.3f80e.0027.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241009091132.2136321-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/slip/slhc.c | 57 ++++++++++++++++++++++++-----------------
+ 1 file changed, 34 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
+index ba93bab948e09..bf9e801cc61cc 100644
+--- a/drivers/net/slip/slhc.c
++++ b/drivers/net/slip/slhc.c
+@@ -643,46 +643,57 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
+ int
+ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize)
+ {
+-      struct cstate *cs;
+-      unsigned ihl;
+-
++      const struct tcphdr *th;
+       unsigned char index;
++      struct iphdr *iph;
++      struct cstate *cs;
++      unsigned int ihl;
+ 
+-      if(isize < 20) {
+-              /* The packet is shorter than a legal IP header */
++      /* The packet is shorter than a legal IP header.
++       * Also make sure isize is positive.
++       */
++      if (isize < (int)sizeof(struct iphdr)) {
++runt:
+               comp->sls_i_runt++;
+-              return slhc_toss( comp );
++              return slhc_toss(comp);
+       }
++      iph = (struct iphdr *)icp;
+       /* Peek at the IP header's IHL field to find its length */
+-      ihl = icp[0] & 0xf;
+-      if(ihl < 20 / 4){
+-              /* The IP header length field is too small */
+-              comp->sls_i_runt++;
+-              return slhc_toss( comp );
+-      }
+-      index = icp[9];
+-      icp[9] = IPPROTO_TCP;
++      ihl = iph->ihl;
++      /* The IP header length field is too small,
++       * or packet is shorter than the IP header followed
++       * by minimal tcp header.
++       */
++      if (ihl < 5 || isize < ihl * 4 + sizeof(struct tcphdr))
++              goto runt;
++
++      index = iph->protocol;
++      iph->protocol = IPPROTO_TCP;
+ 
+       if (ip_fast_csum(icp, ihl)) {
+               /* Bad IP header checksum; discard */
+               comp->sls_i_badcheck++;
+-              return slhc_toss( comp );
++              return slhc_toss(comp);
+       }
+-      if(index > comp->rslot_limit) {
++      if (index > comp->rslot_limit) {
+               comp->sls_i_error++;
+               return slhc_toss(comp);
+       }
+-
++      th = (struct tcphdr *)(icp + ihl * 4);
++      if (th->doff < sizeof(struct tcphdr) / 4)
++              goto runt;
++      if (isize < ihl * 4 + th->doff * 4)
++              goto runt;
+       /* Update local state */
+       cs = &comp->rstate[comp->recv_current = index];
+       comp->flags &=~ SLF_TOSS;
+-      memcpy(&cs->cs_ip,icp,20);
+-      memcpy(&cs->cs_tcp,icp + ihl*4,20);
++      memcpy(&cs->cs_ip, iph, sizeof(*iph));
++      memcpy(&cs->cs_tcp, th, sizeof(*th));
+       if (ihl > 5)
+-        memcpy(cs->cs_ipopt, icp + sizeof(struct iphdr), (ihl - 5) * 4);
+-      if (cs->cs_tcp.doff > 5)
+-        memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4);
+-      cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2;
++        memcpy(cs->cs_ipopt, &iph[1], (ihl - 5) * 4);
++      if (th->doff > 5)
++        memcpy(cs->cs_tcpopt, &th[1], (th->doff - 5) * 4);
++      cs->cs_hsize = ihl*2 + th->doff*2;
+       cs->initialized = true;
+       /* Put headers back on packet
+        * Neither header checksum is recalculated
+-- 
+2.43.0
+
diff --git a/queue-5.15/sunrpc-fix-integer-overflow-in-decode_rc_list.patch b/queue-5.15/sunrpc-fix-integer-overflow-in-decode_rc_list.patch

new file mode 100644 (file)

index 0000000..5414814
--- /dev/null
+++ b/queue-5.15/sunrpc-fix-integer-overflow-in-decode_rc_list.patch
@@ -0,0 +1,37 @@
+From 841db2d64a027aaa516d691b0d6276629d55ba7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Sep 2024 11:50:33 +0300
+Subject: SUNRPC: Fix integer overflow in decode_rc_list()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 6dbf1f341b6b35bcc20ff95b6b315e509f6c5369 ]
+
+The math in "rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)" could have an
+integer overflow.  Add bounds checking on rc_list->rcl_nrefcalls to fix
+that.
+
+Fixes: 4aece6a19cf7 ("nfs41: cb_sequence xdr implementation")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/callback_xdr.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index d0cccddb7d088..fa519ce5c841f 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -372,6 +372,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
+ 
+       rc_list->rcl_nrefcalls = ntohl(*p++);
+       if (rc_list->rcl_nrefcalls) {
++              if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len))
++                      goto out;
+               p = xdr_inline_decode(xdr,
+                            rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
+               if (unlikely(p == NULL))
+-- 
+2.43.0
+
diff --git a/queue-5.15/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch b/queue-5.15/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch

new file mode 100644 (file)

index 0000000..847b566
--- /dev/null
+++ b/queue-5.15/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch
@@ -0,0 +1,153 @@
+From b99c230af04bb507ce4dce1c33394aaf1956197f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 20:05:16 +0000
+Subject: tcp: fix tcp_enter_recovery() to zero retrans_stamp when it's safe
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit b41b4cbd9655bcebcce941bef3601db8110335be ]
+
+Fix tcp_enter_recovery() so that if there are no retransmits out then
+we zero retrans_stamp when entering fast recovery. This is necessary
+to fix two buggy behaviors.
+
+Currently a non-zero retrans_stamp value can persist across multiple
+back-to-back loss recovery episodes. This is because we generally only
+clears retrans_stamp if we are completely done with loss recoveries,
+and get to tcp_try_to_open() and find !tcp_any_retrans_done(sk). This
+behavior causes two bugs:
+
+(1) When a loss recovery episode (CA_Loss or CA_Recovery) is followed
+immediately by a new CA_Recovery, the retrans_stamp value can persist
+and can be a time before this new CA_Recovery episode starts. That
+means that timestamp-based undo will be using the wrong retrans_stamp
+(a value that is too old) when comparing incoming TS ecr values to
+retrans_stamp to see if the current fast recovery episode can be
+undone.
+
+(2) If there is a roughly minutes-long sequence of back-to-back fast
+recovery episodes, one after another (e.g. in a shallow-buffered or
+policed bottleneck), where each fast recovery successfully makes
+forward progress and recovers one window of sequence space (but leaves
+at least one retransmit in flight at the end of the recovery),
+followed by several RTOs, then the ETIMEDOUT check may be using the
+wrong retrans_stamp (a value set at the start of the first fast
+recovery in the sequence). This can cause a very premature ETIMEDOUT,
+killing the connection prematurely.
+
+This commit changes the code to zero retrans_stamp when entering fast
+recovery, when this is known to be safe (no retransmits are out in the
+network). That ensures that when starting a fast recovery episode, and
+it is safe to do so, retrans_stamp is set when we send the fast
+retransmit packet. That addresses both bug (1) and bug (2) by ensuring
+that (if no retransmits are out when we start a fast recovery) we use
+the initial fast retransmit of this fast recovery as the time value
+for undo and ETIMEDOUT calculations.
+
+This makes intuitive sense, since the start of a new fast recovery
+episode (in a scenario where no lost packets are out in the network)
+means that the connection has made forward progress since the last RTO
+or fast recovery, and we should thus "restart the clock" used for both
+undo and ETIMEDOUT logic.
+
+Note that if when we start fast recovery there *are* retransmits out
+in the network, there can still be undesirable (1)/(2) issues. For
+example, after this patch we can still have the (1) and (2) problems
+in cases like this:
+
++ round 1: sender sends flight 1
+
++ round 2: sender receives SACKs and enters fast recovery 1,
+  retransmits some packets in flight 1 and then sends some new data as
+  flight 2
+
++ round 3: sender receives some SACKs for flight 2, notes losses, and
+  retransmits some packets to fill the holes in flight 2
+
++ fast recovery has some lost retransmits in flight 1 and continues
+  for one or more rounds sending retransmits for flight 1 and flight 2
+
++ fast recovery 1 completes when snd_una reaches high_seq at end of
+  flight 1
+
++ there are still holes in the SACK scoreboard in flight 2, so we
+  enter fast recovery 2, but some retransmits in the flight 2 sequence
+  range are still in flight (retrans_out > 0), so we can't execute the
+  new retrans_stamp=0 added here to clear retrans_stamp
+
+It's not yet clear how to fix these remaining (1)/(2) issues in an
+efficient way without breaking undo behavior, given that retrans_stamp
+is currently used for undo and ETIMEDOUT. Perhaps the optimal (but
+expensive) strategy would be to set retrans_stamp to the timestamp of
+the earliest outstanding retransmit when entering fast recovery. But
+at least this commit makes things better.
+
+Note that this does not change the semantics of retrans_stamp; it
+simply makes retrans_stamp accurate in some cases where it was not
+before:
+
+(1) Some loss recovery, followed by an immediate entry into a fast
+recovery, where there are no retransmits out when entering the fast
+recovery.
+
+(2) When a TFO server has a SYNACK retransmit that sets retrans_stamp,
+and then the ACK that completes the 3-way handshake has SACK blocks
+that trigger a fast recovery. In this case when entering fast recovery
+we want to zero out the retrans_stamp from the TFO SYNACK retransmit,
+and set the retrans_stamp based on the timestamp of the fast recovery.
+
+We introduce a tcp_retrans_stamp_cleanup() helper, because this
+two-line sequence already appears in 3 places and is about to appear
+in 2 more as a result of this bug fix patch series. Once this bug fix
+patches series in the net branch makes it into the net-next branch
+we'll update the 3 other call sites to use the new helper.
+
+This is a long-standing issue. The Fixes tag below is chosen to be the
+oldest commit at which the patch will apply cleanly, which is from
+Linux v3.5 in 2012.
+
+Fixes: 1fbc340514fc ("tcp: early retransmit: tcp_enter_recovery()")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241001200517.2756803-3-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 699c48745cdd9..3e7533f645121 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2482,6 +2482,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
+       return false;
+ }
+ 
++/* If loss recovery is finished and there are no retransmits out in the
++ * network, then we clear retrans_stamp so that upon the next loss recovery
++ * retransmits_timed_out() and timestamp-undo are using the correct value.
++ */
++static void tcp_retrans_stamp_cleanup(struct sock *sk)
++{
++      if (!tcp_any_retrans_done(sk))
++              tcp_sk(sk)->retrans_stamp = 0;
++}
++
+ static void DBGUNDO(struct sock *sk, const char *msg)
+ {
+ #if FASTRETRANS_DEBUG > 1
+@@ -2849,6 +2859,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
+       struct tcp_sock *tp = tcp_sk(sk);
+       int mib_idx;
+ 
++      /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */
++      tcp_retrans_stamp_cleanup(sk);
++
+       if (tcp_is_reno(tp))
+               mib_idx = LINUX_MIB_TCPRENORECOVERY;
+       else
+-- 
+2.43.0
+
diff --git a/queue-5.15/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch b/queue-5.15/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch

new file mode 100644 (file)

index 0000000..8bce066
--- /dev/null
+++ b/queue-5.15/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch
@@ -0,0 +1,96 @@
+From 49defdbe6cc6cb7fbc046d1195d4b4ea15f9c18d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 20:05:15 +0000
+Subject: tcp: fix to allow timestamp undo if no retransmits were sent
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit e37ab7373696e650d3b6262a5b882aadad69bb9e ]
+
+Fix the TCP loss recovery undo logic in tcp_packet_delayed() so that
+it can trigger undo even if TSQ prevents a fast recovery episode from
+reaching tcp_retransmit_skb().
+
+Geumhwan Yu <geumhwan.yu@samsung.com> recently reported that after
+this commit from 2019:
+
+commit bc9f38c8328e ("tcp: avoid unconditional congestion window undo
+on SYN retransmit")
+
+...and before this fix we could have buggy scenarios like the
+following:
+
++ Due to reordering, a TCP connection receives some SACKs and enters a
+  spurious fast recovery.
+
++ TSQ prevents all invocations of tcp_retransmit_skb(), because many
+  skbs are queued in lower layers of the sending machine's network
+  stack; thus tp->retrans_stamp remains 0.
+
++ The connection receives a TCP timestamp ECR value echoing a
+  timestamp before the fast recovery, indicating that the fast
+  recovery was spurious.
+
++ The connection fails to undo the spurious fast recovery because
+  tp->retrans_stamp is 0, and thus tcp_packet_delayed() returns false,
+  due to the new logic in the 2019 commit: commit bc9f38c8328e ("tcp:
+  avoid unconditional congestion window undo on SYN retransmit")
+
+This fix tweaks the logic to be more similar to the
+tcp_packet_delayed() logic before bc9f38c8328e, except that we take
+care not to be fooled by the FLAG_SYN_ACKED code path zeroing out
+tp->retrans_stamp (the bug noted and fixed by Yuchung in
+bc9f38c8328e).
+
+Note that this returns the high-level behavior of tcp_packet_delayed()
+to again match the comment for the function, which says: "Nothing was
+retransmitted or returned timestamp is less than timestamp of the
+first retransmission." Note that this comment is in the original
+2005-04-16 Linux git commit, so this is evidently long-standing
+behavior.
+
+Fixes: bc9f38c8328e ("tcp: avoid unconditional congestion window undo on SYN retransmit")
+Reported-by: Geumhwan Yu <geumhwan.yu@samsung.com>
+Diagnosed-by: Geumhwan Yu <geumhwan.yu@samsung.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241001200517.2756803-2-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index c51ad6b353eef..699c48745cdd9 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2433,8 +2433,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
+  */
+ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
+ {
+-      return tp->retrans_stamp &&
+-             tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
++      const struct sock *sk = (const struct sock *)tp;
++
++      if (tp->retrans_stamp &&
++          tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
++              return true;  /* got echoed TS before first retransmission */
++
++      /* Check if nothing was retransmitted (retrans_stamp==0), which may
++       * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
++       * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
++       * retrans_stamp even if we had retransmitted the SYN.
++       */
++      if (!tp->retrans_stamp &&          /* no record of a retransmit/SYN? */
++          sk->sk_state != TCP_SYN_SENT)  /* not the FLAG_SYN_ACKED case? */
++              return true;  /* nothing was retransmitted */
++
++      return false;
+ }
+ 
+ /* Undo procedures. */
+-- 
+2.43.0
+
diff --git a/queue-5.15/thermal-int340x-processor_thermal-set-feature-mask-b.patch b/queue-5.15/thermal-int340x-processor_thermal-set-feature-mask-b.patch

new file mode 100644 (file)

index 0000000..a5d61f2
--- /dev/null
+++ b/queue-5.15/thermal-int340x-processor_thermal-set-feature-mask-b.patch
@@ -0,0 +1,87 @@
+From 6d53a497e3a69884eea6b2e2dbb68ff70bfe0506 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Oct 2023 12:05:34 -0700
+Subject: thermal: int340x: processor_thermal: Set feature mask before
+ proc_thermal_add
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+[ Upstream commit 6ebc25d8b053a208786295bab58abbb66b39c318 ]
+
+The function proc_thermal_add() adds sysfs entries for power limits.
+
+The feature mask of available features is not present at that time, so
+it cannot be used by proc_thermal_add() to selectively create sysfs
+attributes.
+
+The feature mask is set by proc_thermal_mmio_add(), so modify the code
+to call it before proc_thermal_add() so as to allow the latter to use
+the feature mask.
+
+There is no functional impact with this change.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+[ rjw: Changelog edits ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Stable-dep-of: 99ca0b57e49f ("thermal: intel: int340x: processor: Fix warning during module unload")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../processor_thermal_device_pci.c            | 21 +++++++++----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+index b4bcd3fe9eb2f..921ed55c30f06 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+@@ -237,26 +237,26 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
+ 
+       INIT_DELAYED_WORK(&pci_info->work, proc_thermal_threshold_work_fn);
+ 
+-      ret = proc_thermal_add(&pdev->dev, proc_priv);
+-      if (ret) {
+-              dev_err(&pdev->dev, "error: proc_thermal_add, will continue\n");
+-              pci_info->no_legacy = 1;
+-      }
+-
+       proc_priv->priv_data = pci_info;
+       pci_info->proc_priv = proc_priv;
+       pci_set_drvdata(pdev, proc_priv);
+ 
+       ret = proc_thermal_mmio_add(pdev, proc_priv, id->driver_data);
+       if (ret)
+-              goto err_ret_thermal;
++              return ret;
++
++      ret = proc_thermal_add(&pdev->dev, proc_priv);
++      if (ret) {
++              dev_err(&pdev->dev, "error: proc_thermal_add, will continue\n");
++              pci_info->no_legacy = 1;
++      }
+ 
+       pci_info->tzone = thermal_zone_device_register("TCPU_PCI", 1, 1, pci_info,
+                                                       &tzone_ops,
+                                                       &tzone_params, 0, 0);
+       if (IS_ERR(pci_info->tzone)) {
+               ret = PTR_ERR(pci_info->tzone);
+-              goto err_ret_mmio;
++              goto err_del_legacy;
+       }
+ 
+       /* request and enable interrupt */
+@@ -283,11 +283,10 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
+       pci_free_irq_vectors(pdev);
+ err_ret_tzone:
+       thermal_zone_device_unregister(pci_info->tzone);
+-err_ret_mmio:
+-      proc_thermal_mmio_remove(pdev, proc_priv);
+-err_ret_thermal:
++err_del_legacy:
+       if (!pci_info->no_legacy)
+               proc_thermal_remove(proc_priv);
++      proc_thermal_mmio_remove(pdev, proc_priv);
+       pci_disable_device(pdev);
+ 
+       return ret;
+-- 
+2.43.0
+
diff --git a/queue-5.15/thermal-intel-int340x-processor-fix-warning-during-m.patch b/queue-5.15/thermal-intel-int340x-processor-fix-warning-during-m.patch

new file mode 100644 (file)

index 0000000..fa1112f
--- /dev/null
+++ b/queue-5.15/thermal-intel-int340x-processor-fix-warning-during-m.patch
@@ -0,0 +1,82 @@
+From 236aa930a4d4efdf6160aa6fe86d654f1b2f0cd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Sep 2024 16:17:57 +0800
+Subject: thermal: intel: int340x: processor: Fix warning during module unload
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+[ Upstream commit 99ca0b57e49fb73624eede1c4396d9e3d10ccf14 ]
+
+The processor_thermal driver uses pcim_device_enable() to enable a PCI
+device, which means the device will be automatically disabled on driver
+detach.  Thus there is no need to call pci_disable_device() again on it.
+
+With recent PCI device resource management improvements, e.g. commit
+f748a07a0b64 ("PCI: Remove legacy pcim_release()"), this problem is
+exposed and triggers the warining below.
+
+ [  224.010735] proc_thermal_pci 0000:00:04.0: disabling already-disabled device
+ [  224.010747] WARNING: CPU: 8 PID: 4442 at drivers/pci/pci.c:2250 pci_disable_device+0xe5/0x100
+ ...
+ [  224.010844] Call Trace:
+ [  224.010845]  <TASK>
+ [  224.010847]  ? show_regs+0x6d/0x80
+ [  224.010851]  ? __warn+0x8c/0x140
+ [  224.010854]  ? pci_disable_device+0xe5/0x100
+ [  224.010856]  ? report_bug+0x1c9/0x1e0
+ [  224.010859]  ? handle_bug+0x46/0x80
+ [  224.010862]  ? exc_invalid_op+0x1d/0x80
+ [  224.010863]  ? asm_exc_invalid_op+0x1f/0x30
+ [  224.010867]  ? pci_disable_device+0xe5/0x100
+ [  224.010869]  ? pci_disable_device+0xe5/0x100
+ [  224.010871]  ? kfree+0x21a/0x2b0
+ [  224.010873]  pcim_disable_device+0x20/0x30
+ [  224.010875]  devm_action_release+0x16/0x20
+ [  224.010878]  release_nodes+0x47/0xc0
+ [  224.010880]  devres_release_all+0x9f/0xe0
+ [  224.010883]  device_unbind_cleanup+0x12/0x80
+ [  224.010885]  device_release_driver_internal+0x1ca/0x210
+ [  224.010887]  driver_detach+0x4e/0xa0
+ [  224.010889]  bus_remove_driver+0x6f/0xf0
+ [  224.010890]  driver_unregister+0x35/0x60
+ [  224.010892]  pci_unregister_driver+0x44/0x90
+ [  224.010894]  proc_thermal_pci_driver_exit+0x14/0x5f0 [processor_thermal_device_pci]
+ ...
+ [  224.010921] ---[ end trace 0000000000000000 ]---
+
+Remove the excess pci_disable_device() calls.
+
+Fixes: acd65d5d1cf4 ("thermal/drivers/int340x/processor_thermal: Add PCI MMIO based thermal driver")
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Link: https://patch.msgid.link/20240930081801.28502-3-rui.zhang@intel.com
+[ rjw: Subject and changelog edits ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../intel/int340x_thermal/processor_thermal_device_pci.c        | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+index 921ed55c30f06..f1c65eae8c10a 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+@@ -287,7 +287,6 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
+       if (!pci_info->no_legacy)
+               proc_thermal_remove(proc_priv);
+       proc_thermal_mmio_remove(pdev, proc_priv);
+-      pci_disable_device(pdev);
+ 
+       return ret;
+ }
+@@ -309,7 +308,6 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev)
+       proc_thermal_mmio_remove(pdev, pci_info->proc_priv);
+       if (!pci_info->no_legacy)
+               proc_thermal_remove(proc_priv);
+-      pci_disable_device(pdev);
+ }
+ 
+ #ifdef CONFIG_PM_SLEEP
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 13 Oct 2024 02:48:14 +0000 (22:48 -0400)
queue-5.15/bluetooth-rfcomm-fix-possible-deadlock-in-rfcomm_sk_.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/gpio-aspeed-add-the-flush-write-to-ensure-the-write-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/gpio-aspeed-use-devm_clk-api-to-manage-clock-source.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/i40e-fix-macvlan-leak-by-synchronizing-access-to-mac.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ice-fix-netif_is_ice-in-safe-mode.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ice-fix-vlan-replay-after-reset.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/igb-do-not-bring-the-device-up-after-non-fatal-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mctp-handle-error-of-rtnl_register_module.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-dsa-b53-allow-lower-mtus-on-bcm5325-5365.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-dsa-b53-fix-jumbo-frame-mtu-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-dsa-b53-fix-jumbo-frames-on-10-100-ports.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-dsa-b53-fix-max-mtu-for-1g-switches.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-dsa-b53-fix-max-mtu-for-bcm5325-bcm5365.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-ibm-emac-mal-fix-wrong-goto.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-phy-bcm84881-fix-some-error-handling-paths.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-phy-dp83869-fix-memory-corruption-when-enabling-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-rtnetlink-add-msg-kind-names.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-sched-accept-tca_stab-only-for-root-qdisc.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-br_netfilter-fix-panic-with-metadata_dst-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-fib-check-correct-rtable-in-vrf-setups.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-rpfilter-fib-populate-flowic_l3mdev-field.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-rpfilter-fib-set-flowic_uid-correctly-for-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-xtables-avoid-nfproto_unspec-where-needed.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfsd-mark-filecache-down-if-init-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfsv4-prevent-null-pointer-dereference-in-nfs42_comp.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ppp-fix-ppp_async_encode-illegal-access.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/rtnetlink-add-bulk-registration-helpers-for-rtnetlin.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sctp-ensure-sk_state-is-set-to-closed-if-hashing-fai.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/slip-make-slhc_remember-more-robust-against-maliciou.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sunrpc-fix-integer-overflow-in-decode_rc_list.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/tcp-fix-tcp_enter_recovery-to-zero-retrans_stamp-whe.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/tcp-fix-to-allow-timestamp-undo-if-no-retransmits-we.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/thermal-int340x-processor_thermal-set-feature-mask-b.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/thermal-intel-int340x-processor-fix-warning-during-m.patch	[new file with mode: 0644]	patch \| blob