From: Sasha Levin Date: Sun, 3 Mar 2024 15:56:43 +0000 (-0500) Subject: Fixes for 5.15 X-Git-Tag: v4.19.309~92^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c4d28416588fe331bc46a60b96e641b418a65795;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch b/queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch new file mode 100644 index 00000000000..bde637e8c7b --- /dev/null +++ b/queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch @@ -0,0 +1,68 @@ +From 38166116c770d034a08477438037a48df4b8bf94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Feb 2024 13:15:02 +0000 +Subject: afs: Fix endless loop in directory parsing + +From: David Howells + +[ Upstream commit 5f7a07646655fb4108da527565dcdc80124b14c4 ] + +If a directory has a block with only ".__afsXXXX" files in it (from +uncompleted silly-rename), these .__afsXXXX files are skipped but without +advancing the file position in the dir_context. This leads to +afs_dir_iterate() repeating the block again and again. + +Fix this by making the code that skips the .__afsXXXX file also manually +advance the file position. + +The symptoms are a soft lookup: + + watchdog: BUG: soft lockup - CPU#3 stuck for 52s! [check:5737] + ... + RIP: 0010:afs_dir_iterate_block+0x39/0x1fd + ... + ? watchdog_timer_fn+0x1a6/0x213 + ... + ? asm_sysvec_apic_timer_interrupt+0x16/0x20 + ? afs_dir_iterate_block+0x39/0x1fd + afs_dir_iterate+0x10a/0x148 + afs_readdir+0x30/0x4a + iterate_dir+0x93/0xd3 + __do_sys_getdents64+0x6b/0xd4 + +This is almost certainly the actual fix for: + + https://bugzilla.kernel.org/show_bug.cgi?id=218496 + +Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") +Signed-off-by: David Howells +Link: https://lore.kernel.org/r/786185.1708694102@warthog.procyon.org.uk +Reviewed-by: Marc Dionne +cc: Marc Dionne +cc: Markus Suvanto +cc: linux-afs@lists.infradead.org +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/afs/dir.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/afs/dir.c b/fs/afs/dir.c +index 106426de50279..c4e22e9f7a666 100644 +--- a/fs/afs/dir.c ++++ b/fs/afs/dir.c +@@ -497,8 +497,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, + dire->u.name[0] == '.' && + ctx->actor != afs_lookup_filldir && + ctx->actor != afs_lookup_one_filldir && +- memcmp(dire->u.name, ".__afs", 6) == 0) ++ memcmp(dire->u.name, ".__afs", 6) == 0) { ++ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + continue; ++ } + + /* found the next entry */ + if (!dir_emit(ctx, dire->u.name, nlen, +-- +2.43.0 + diff --git a/queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch b/queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch new file mode 100644 index 00000000000..51e8de5c467 --- /dev/null +++ b/queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch @@ -0,0 +1,35 @@ +From 6cacd78d3d0d7f306351ea57e1693c9ebcf06c13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Feb 2024 10:21:56 +0100 +Subject: ALSA: Drop leftover snd-rtctimer stuff from Makefile + +From: Takashi Iwai + +[ Upstream commit 4df49712eb54141be00a9312547436d55677f092 ] + +We forgot to remove the line for snd-rtctimer from Makefile while +dropping the functionality. Get rid of the stale line. + +Fixes: 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer") +Link: https://lore.kernel.org/r/20240221092156.28695-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/Makefile | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/sound/core/Makefile b/sound/core/Makefile +index 79e1407cd0de7..7da92e0383e1c 100644 +--- a/sound/core/Makefile ++++ b/sound/core/Makefile +@@ -33,7 +33,6 @@ snd-ctl-led-objs := control_led.o + snd-rawmidi-objs := rawmidi.o + snd-timer-objs := timer.o + snd-hrtimer-objs := hrtimer.o +-snd-rtctimer-objs := rtctimer.o + snd-hwdep-objs := hwdep.o + snd-seq-device-objs := seq_device.o + +-- +2.43.0 + diff --git a/queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch b/queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch new file mode 100644 index 00000000000..0d65df31e82 --- /dev/null +++ b/queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch @@ -0,0 +1,66 @@ +From 3cd502c7c9bd2a1f6501604c1bf1203a1befa44e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Jan 2024 11:56:32 +0000 +Subject: Bluetooth: Avoid potential use-after-free in hci_error_reset + +From: Ying Hsu + +[ Upstream commit 2449007d3f73b2842c9734f45f0aadb522daf592 ] + +While handling the HCI_EV_HARDWARE_ERROR event, if the underlying +BT controller is not responding, the GPIO reset mechanism would +free the hci_dev and lead to a use-after-free in hci_error_reset. + +Here's the call trace observed on a ChromeOS device with Intel AX201: + queue_work_on+0x3e/0x6c + __hci_cmd_sync_sk+0x2ee/0x4c0 [bluetooth ] + ? init_wait_entry+0x31/0x31 + __hci_cmd_sync+0x16/0x20 [bluetooth ] + hci_error_reset+0x4f/0xa4 [bluetooth ] + process_one_work+0x1d8/0x33f + worker_thread+0x21b/0x373 + kthread+0x13a/0x152 + ? pr_cont_work+0x54/0x54 + ? kthread_blkcg+0x31/0x31 + ret_from_fork+0x1f/0x30 + +This patch holds the reference count on the hci_dev while processing +a HCI_EV_HARDWARE_ERROR event to avoid potential crash. + +Fixes: c7741d16a57c ("Bluetooth: Perform a power cycle when receiving hardware error event") +Signed-off-by: Ying Hsu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_core.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index b3b597960c562..a8854b24f4cfb 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -2330,6 +2330,7 @@ static void hci_error_reset(struct work_struct *work) + { + struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + ++ hci_dev_hold(hdev); + BT_DBG("%s", hdev->name); + + if (hdev->hw_error) +@@ -2337,10 +2338,10 @@ static void hci_error_reset(struct work_struct *work) + else + bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); + +- if (hci_dev_do_close(hdev)) +- return; ++ if (!hci_dev_do_close(hdev)) ++ hci_dev_do_open(hdev); + +- hci_dev_do_open(hdev); ++ hci_dev_put(hdev); + } + + void hci_uuids_clear(struct hci_dev *hdev) +-- +2.43.0 + diff --git a/queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch b/queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch new file mode 100644 index 00000000000..4af382f6ea9 --- /dev/null +++ b/queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch @@ -0,0 +1,68 @@ +From d5a12daef1accfe1f0be72b6de584739d96c11c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Jan 2024 14:50:28 +0800 +Subject: Bluetooth: Enforce validation on max value of connection interval + +From: Kai-Heng Feng + +[ Upstream commit e4b019515f950b4e6e5b74b2e1bb03a90cb33039 ] + +Right now Linux BT stack cannot pass test case "GAP/CONN/CPUP/BV-05-C +'Connection Parameter Update Procedure Invalid Parameters Central +Responder'" in Bluetooth Test Suite revision GAP.TS.p44. [0] + +That was revoled by commit c49a8682fc5d ("Bluetooth: validate BLE +connection interval updates"), but later got reverted due to devices +like keyboards and mice may require low connection interval. + +So only validate the max value connection interval to pass the Test +Suite, and let devices to request low connection interval if needed. + +[0] https://www.bluetooth.org/docman/handlers/DownloadDoc.ashx?doc_id=229869 + +Fixes: 68d19d7d9957 ("Revert "Bluetooth: validate BLE connection interval updates"") +Signed-off-by: Kai-Heng Feng +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 4 ++++ + net/bluetooth/l2cap_core.c | 8 +++++++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 0bfd856d079d5..ba7242729a8fb 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -6058,6 +6058,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + ++ if (max > hcon->le_conn_max_interval) ++ return send_conn_param_neg_reply(hdev, handle, ++ HCI_ERROR_INVALID_LL_PARAMS); ++ + if (hci_check_conn_params(min, max, latency, timeout)) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 850b6aab73779..11bfc8737e6ce 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + + memset(&rsp, 0, sizeof(rsp)); + +- err = hci_check_conn_params(min, max, latency, to_multiplier); ++ if (max > hcon->le_conn_max_interval) { ++ BT_DBG("requested connection interval exceeds current bounds."); ++ err = -EINVAL; ++ } else { ++ err = hci_check_conn_params(min, max, latency, to_multiplier); ++ } ++ + if (err) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else +-- +2.43.0 + diff --git a/queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch b/queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch new file mode 100644 index 00000000000..3aa7d6528e2 --- /dev/null +++ b/queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch @@ -0,0 +1,42 @@ +From 9bb5f007e7a4af2a8e7284a87875684be129592f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 09:02:47 -0500 +Subject: Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST + +From: Luiz Augusto von Dentz + +[ Upstream commit 7e74aa53a68bf60f6019bd5d9a9a1406ec4d4865 ] + +If we received HCI_EV_IO_CAPA_REQUEST while +HCI_OP_READ_REMOTE_EXT_FEATURES is yet to be responded assume the remote +does support SSP since otherwise this event shouldn't be generated. + +Link: https://lore.kernel.org/linux-bluetooth/CABBYNZ+9UdG1cMZVmdtN3U2aS16AKMCyTARZZyFX7xTEDWcMOw@mail.gmail.com/T/#t +Fixes: c7f59461f5a7 ("Bluetooth: Fix a refcnt underflow problem for hci_conn") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index c4a35d4612b05..0bfd856d079d5 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -4720,9 +4720,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); +- if (!conn || !hci_conn_ssp_enabled(conn)) ++ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + goto unlock; + ++ /* Assume remote supports SSP since it has triggered this event */ ++ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); ++ + hci_conn_hold(conn); + + if (!hci_dev_test_flag(hdev, HCI_MGMT)) +-- +2.43.0 + diff --git a/queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch b/queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch new file mode 100644 index 00000000000..4ba58c1dbfb --- /dev/null +++ b/queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch @@ -0,0 +1,42 @@ +From bbf6131045fd6659821e6ea7c4f14301cc924232 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Jan 2024 19:03:23 +0800 +Subject: Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR + +From: Zijun Hu + +[ Upstream commit 61a5ab72edea7ebc3ad2c6beea29d966f528ebfb ] + +hci_store_wake_reason() wrongly parses event HCI_Connection_Request +as HCI_Connection_Complete and HCI_Connection_Complete as +HCI_Connection_Request, so causes recording wakeup BD_ADDR error and +potential stability issue, fix it by using the correct field. + +Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events") +Signed-off-by: Zijun Hu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 2ad2f4647847c..c4a35d4612b05 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -6272,10 +6272,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, + * keep track of the bdaddr of the connection event that woke us up. + */ + if (event == HCI_EV_CONN_REQUEST) { +- bacpy(&hdev->wake_addr, &conn_complete->bdaddr); ++ bacpy(&hdev->wake_addr, &conn_request->bdaddr); + hdev->wake_addr_type = BDADDR_BREDR; + } else if (event == HCI_EV_CONN_COMPLETE) { +- bacpy(&hdev->wake_addr, &conn_request->bdaddr); ++ bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + hdev->wake_addr_type = BDADDR_BREDR; + } else if (event == HCI_EV_LE_META) { + struct hci_ev_le_meta *le_ev = (void *)skb->data; +-- +2.43.0 + diff --git a/queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch b/queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch new file mode 100644 index 00000000000..ddb31557b54 --- /dev/null +++ b/queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch @@ -0,0 +1,42 @@ +From 4c864a7c9d27772a6b65133248c2d758de049e75 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 17 Feb 2024 13:30:10 -0800 +Subject: cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf + call back + +From: Doug Smythies + +[ Upstream commit f0a0fc10abb062d122db5ac4ed42f6d1ca342649 ] + +There is a loophole in pstate limit clamping for the intel_cpufreq CPU +frequency scaling driver (intel_pstate in passive mode), schedutil CPU +frequency scaling governor, HWP (HardWare Pstate) control enabled, when +the adjust_perf call back path is used. + +Fix it. + +Fixes: a365ab6b9dfb cpufreq: intel_pstate: Implement the ->adjust_perf() callback +Signed-off-by: Doug Smythies +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/intel_pstate.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index dd5f4eee9ffb6..4de71e772f514 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2787,6 +2787,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, + if (min_pstate < cpu->min_perf_ratio) + min_pstate = cpu->min_perf_ratio; + ++ if (min_pstate > cpu->max_perf_ratio) ++ min_pstate = cpu->max_perf_ratio; ++ + max_pstate = min(cap_pstate, cpu->max_perf_ratio); + if (max_pstate < min_pstate) + max_pstate = min_pstate; +-- +2.43.0 + diff --git a/queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch b/queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch new file mode 100644 index 00000000000..939e316da46 --- /dev/null +++ b/queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch @@ -0,0 +1,43 @@ +From d2836ae85f8c0e574fbd4404d0ca6cf69a6cc476 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Feb 2024 12:24:40 +0100 +Subject: efi/capsule-loader: fix incorrect allocation size + +From: Arnd Bergmann + +[ Upstream commit fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e ] + +gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures +is not enough for a 64-bit phys_addr_t: + +drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open': +drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size] + 295 | cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + | ^ + +Use the correct type instead here. + +Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping") +Signed-off-by: Arnd Bergmann +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/capsule-loader.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c +index 3e8d4b51a8140..97bafb5f70389 100644 +--- a/drivers/firmware/efi/capsule-loader.c ++++ b/drivers/firmware/efi/capsule-loader.c +@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) + return -ENOMEM; + } + +- cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); ++ cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); +-- +2.43.0 + diff --git a/queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch b/queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch new file mode 100644 index 00000000000..766c321368c --- /dev/null +++ b/queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch @@ -0,0 +1,95 @@ +From ea962eef9ca6461736867cb86a60da28aaa572cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 12:44:11 +0100 +Subject: fbcon: always restore the old font data in fbcon_do_set_font() + +From: Jiri Slaby (SUSE) + +[ Upstream commit 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f ] + +Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when +vc_resize() failed) started restoring old font data upon failure (of +vc_resize()). But it performs so only for user fonts. It means that the +"system"/internal fonts are not restored at all. So in result, the very +first call to fbcon_do_set_font() performs no restore at all upon +failing vc_resize(). + +This can be reproduced by Syzkaller to crash the system on the next +invocation of font_get(). It's rather hard to hit the allocation failure +in vc_resize() on the first font_set(), but not impossible. Esp. if +fault injection is used to aid the execution/failure. It was +demonstrated by Sirius: + BUG: unable to handle page fault for address: fffffffffffffff8 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 + Oops: 0000 [#1] PREEMPT SMP KASAN + CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 + RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 + Call Trace: + + con_font_get drivers/tty/vt/vt.c:4558 [inline] + con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 + vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] + vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 + tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 + vfs_ioctl fs/ioctl.c:51 [inline] + ... + +So restore the font data in any case, not only for user fonts. Note the +later 'if' is now protected by 'old_userfont' and not 'old_data' as the +latter is always set now. (And it is supposed to be non-NULL. Otherwise +we would see the bug above again.) + +Signed-off-by: Jiri Slaby (SUSE) +Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") +Reported-and-tested-by: Ubisectech Sirius +Cc: Ubisectech Sirius +Cc: Daniel Vetter +Cc: Helge Deller +Cc: linux-fbdev@vger.kernel.org +Cc: dri-devel@lists.freedesktop.org +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/core/fbcon.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c +index b6712655ec1f0..b163b54b868e6 100644 +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2409,11 +2409,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + int resize, ret, old_userfont, old_width, old_height, old_charcount; +- char *old_data = NULL; ++ u8 *old_data = vc->vc_font.data; + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); +- if (p->userfont) +- old_data = vc->vc_font.data; + vc->vc_font.data = (void *)(p->fontdata = data); + old_userfont = p->userfont; + if ((p->userfont = userfont)) +@@ -2447,13 +2445,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + update_screen(vc); + } + +- if (old_data && (--REFCOUNT(old_data) == 0)) ++ if (old_userfont && (--REFCOUNT(old_data) == 0)) + kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); + return 0; + + err_out: + p->fontdata = old_data; +- vc->vc_font.data = (void *)old_data; ++ vc->vc_font.data = old_data; + + if (userfont) { + p->userfont = old_userfont; +-- +2.43.0 + diff --git a/queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch b/queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch new file mode 100644 index 00000000000..235c994c259 --- /dev/null +++ b/queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch @@ -0,0 +1,62 @@ +From 8c38f5c11f86a0c30a105012d3c28ba9cc7d94f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Feb 2024 10:49:41 -0800 +Subject: igb: extend PTP timestamp adjustments to i211 + +From: Oleksij Rempel + +[ Upstream commit 0bb7b09392eb74b152719ae87b1ba5e4bf910ef0 ] + +The i211 requires the same PTP timestamp adjustments as the i210, +according to its datasheet. To ensure consistent timestamping across +different platforms, this change extends the existing adjustments to +include the i211. + +The adjustment result are tested and comparable for i210 and i211 based +systems. + +Fixes: 3f544d2a4d5c ("igb: adjust PTP timestamps for Tx/Rx latency") +Signed-off-by: Oleksij Rempel +Reviewed-by: Jacob Keller +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Link: https://lore.kernel.org/r/20240227184942.362710-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c +index 9cdb7a856ab6c..1a1575e8577af 100644 +--- a/drivers/net/ethernet/intel/igb/igb_ptp.c ++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c +@@ -826,7 +826,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + /* adjust timestamp for the TX latency based on link speed */ +- if (adapter->hw.mac.type == e1000_i210) { ++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_TX_LATENCY_10; +@@ -872,6 +872,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + ktime_t *timestamp) + { + struct igb_adapter *adapter = q_vector->adapter; ++ struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps ts; + __le64 *regval = (__le64 *)va; + int adjust = 0; +@@ -891,7 +892,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); + + /* adjust timestamp for the RX latency based on link speed */ +- if (adapter->hw.mac.type == e1000_i210) { ++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_RX_LATENCY_10; +-- +2.43.0 + diff --git a/queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch b/queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch new file mode 100644 index 00000000000..6ab963de3f2 --- /dev/null +++ b/queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch @@ -0,0 +1,45 @@ +From 7b448de1f86cc50dbade510a3cafd85b321d4869 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Feb 2024 12:17:47 +0000 +Subject: ipv6: fix potential "struct net" leak in inet6_rtm_getaddr() + +From: Eric Dumazet + +[ Upstream commit 10bfd453da64a057bcfd1a49fb6b271c48653cdb ] + +It seems that if userspace provides a correct IFA_TARGET_NETNSID value +but no IFA_ADDRESS and IFA_LOCAL attributes, inet6_rtm_getaddr() +returns -EINVAL with an elevated "struct net" refcount. + +Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR") +Signed-off-by: Eric Dumazet +Cc: Christian Brauner +Cc: David Ahern +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/addrconf.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index c52317184e3e2..968ca078191cd 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5463,9 +5463,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, + } + + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); +- if (!addr) +- return -EINVAL; +- ++ if (!addr) { ++ err = -EINVAL; ++ goto errout; ++ } + ifm = nlmsg_data(nlh); + if (ifm->ifa_index) + dev = dev_get_by_index(tgt_net, ifm->ifa_index); +-- +2.43.0 + diff --git a/queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch b/queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch new file mode 100644 index 00000000000..3790f60d0fb --- /dev/null +++ b/queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch @@ -0,0 +1,45 @@ +From 1aaed7c27b7dc092c5dcbda64bc0bd79e8703ecd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Feb 2024 13:38:38 +0100 +Subject: lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is + detected + +From: Oleksij Rempel + +[ Upstream commit 0e67899abfbfdea0c3c0ed3fd263ffc601c5c157 ] + +Same as LAN7800, LAN7850 can be used without EEPROM. If EEPROM is not +present or not flashed, LAN7850 will fail to sync the speed detected by the PHY +with the MAC. In case link speed is 100Mbit, it will accidentally work, +otherwise no data can be transferred. + +Better way would be to implement link_up callback, or set auto speed +configuration unconditionally. But this changes would be more intrusive. +So, for now, set it only if no EEPROM is found. + +Fixes: e69647a19c87 ("lan78xx: Set ASD in MAC_CR when EEE is enabled.") +Signed-off-by: Oleksij Rempel +Link: https://lore.kernel.org/r/20240222123839.2816561-1-o.rempel@pengutronix.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/usb/lan78xx.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index 5700c9d20a3e2..c8b42892655a1 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -2862,7 +2862,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) + if (dev->chipid == ID_REV_CHIP_ID_7801_) + buf &= ~MAC_CR_GMII_EN_; + +- if (dev->chipid == ID_REV_CHIP_ID_7800_) { ++ if (dev->chipid == ID_REV_CHIP_ID_7800_ || ++ dev->chipid == ID_REV_CHIP_ID_7850_) { + ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); + if (!ret && sig != EEPROM_INDICATOR) { + /* Implies there is no external eeprom. Set mac speed */ +-- +2.43.0 + diff --git a/queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch b/queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch new file mode 100644 index 00000000000..e9c868ba8ec --- /dev/null +++ b/queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch @@ -0,0 +1,63 @@ +From 4a0367bb181569b5cc86ae4e1018b971b3e769ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Nov 2023 09:07:01 -0600 +Subject: mtd: spinand: gigadevice: Fix the get ecc status issue + +From: Han Xu + +[ Upstream commit 59950610c0c00c7a06d8a75d2ee5d73dba4274cf ] + +Some GigaDevice ecc_get_status functions use on-stack buffer for +spi_mem_op causes spi_mem_check_op failing, fix the issue by using +spinand scratchbuf. + +Fixes: c40c7a990a46 ("mtd: spinand: Add support for GigaDevice GD5F1GQ4UExxG") +Signed-off-by: Han Xu +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/20231108150701.593912-1-han.xu@nxp.com +Signed-off-by: Sasha Levin +--- + drivers/mtd/nand/spi/gigadevice.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c +index da77ab20296ea..56d1b56615f97 100644 +--- a/drivers/mtd/nand/spi/gigadevice.c ++++ b/drivers/mtd/nand/spi/gigadevice.c +@@ -178,7 +178,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, + { + u8 status2; + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, +- &status2); ++ spinand->scratchbuf); + int ret; + + switch (status & STATUS_ECC_MASK) { +@@ -199,6 +199,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, + * report the maximum of 4 in this case + */ + /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ ++ status2 = *(spinand->scratchbuf); + return ((status & STATUS_ECC_MASK) >> 2) | + ((status2 & STATUS_ECC_MASK) >> 4); + +@@ -220,7 +221,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, + { + u8 status2; + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, +- &status2); ++ spinand->scratchbuf); + int ret; + + switch (status & STATUS_ECC_MASK) { +@@ -240,6 +241,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, + * 1 ... 4 bits are flipped (and corrected) + */ + /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ ++ status2 = *(spinand->scratchbuf); + return ((status2 & STATUS_ECC_MASK) >> 4) + 1; + + case STATUS_ECC_UNCOR_ERROR: +-- +2.43.0 + diff --git a/queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch b/queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch new file mode 100644 index 00000000000..ec6c89b2b15 --- /dev/null +++ b/queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch @@ -0,0 +1,37 @@ +From c9e4dec79cee5595362c0b72b84e612297ad973b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Apr 2022 08:17:35 +0300 +Subject: net: enable memcg accounting for veth queues + +From: Vasily Averin + +[ Upstream commit 961c6136359eef38a8c023d02028fdcd123f02a6 ] + +veth netdevice defines own rx queues and allocates array containing +up to 4095 ~750-bytes-long 'struct veth_rq' elements. Such allocation +is quite huge and should be accounted to memcg. + +Signed-off-by: Vasily Averin +Signed-off-by: David S. Miller +Stable-dep-of: 1ce7d306ea63 ("veth: try harder when allocating queue memory") +Signed-off-by: Sasha Levin +--- + drivers/net/veth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 85c3e12f83627..87cee614618ca 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1303,7 +1303,7 @@ static int veth_alloc_queues(struct net_device *dev) + struct veth_priv *priv = netdev_priv(dev); + int i; + +- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL); ++ priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + if (!priv->rq) + return -ENOMEM; + +-- +2.43.0 + diff --git a/queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch b/queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch new file mode 100644 index 00000000000..3c57484d317 --- /dev/null +++ b/queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch @@ -0,0 +1,181 @@ +From bada6523094d175dcf46b7499d3a14131760a80a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Feb 2024 14:56:02 +0100 +Subject: net: ip_tunnel: prevent perpetual headroom growth + +From: Florian Westphal + +[ Upstream commit 5ae1e9922bbdbaeb9cfbe91085ab75927488ac0f ] + +syzkaller triggered following kasan splat: +BUG: KASAN: use-after-free in __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 +Read of size 1 at addr ffff88812fb4000e by task syz-executor183/5191 +[..] + kasan_report+0xda/0x110 mm/kasan/report.c:588 + __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 + skb_flow_dissect_flow_keys include/linux/skbuff.h:1514 [inline] + ___skb_get_hash net/core/flow_dissector.c:1791 [inline] + __skb_get_hash+0xc7/0x540 net/core/flow_dissector.c:1856 + skb_get_hash include/linux/skbuff.h:1556 [inline] + ip_tunnel_xmit+0x1855/0x33c0 net/ipv4/ip_tunnel.c:748 + ipip_tunnel_xmit+0x3cc/0x4e0 net/ipv4/ipip.c:308 + __netdev_start_xmit include/linux/netdevice.h:4940 [inline] + netdev_start_xmit include/linux/netdevice.h:4954 [inline] + xmit_one net/core/dev.c:3548 [inline] + dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 + __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4349 + dev_queue_xmit include/linux/netdevice.h:3134 [inline] + neigh_connected_output+0x42c/0x5d0 net/core/neighbour.c:1592 + ... + ip_finish_output2+0x833/0x2550 net/ipv4/ip_output.c:235 + ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323 + .. + iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82 + ip_tunnel_xmit+0x1dbc/0x33c0 net/ipv4/ip_tunnel.c:831 + ipgre_xmit+0x4a1/0x980 net/ipv4/ip_gre.c:665 + __netdev_start_xmit include/linux/netdevice.h:4940 [inline] + netdev_start_xmit include/linux/netdevice.h:4954 [inline] + xmit_one net/core/dev.c:3548 [inline] + dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 + ... + +The splat occurs because skb->data points past skb->head allocated area. +This is because neigh layer does: + __skb_pull(skb, skb_network_offset(skb)); + +... but skb_network_offset() returns a negative offset and __skb_pull() +arg is unsigned. IOW, we skb->data gets "adjusted" by a huge value. + +The negative value is returned because skb->head and skb->data distance is +more than 64k and skb->network_header (u16) has wrapped around. + +The bug is in the ip_tunnel infrastructure, which can cause +dev->needed_headroom to increment ad infinitum. + +The syzkaller reproducer consists of packets getting routed via a gre +tunnel, and route of gre encapsulated packets pointing at another (ipip) +tunnel. The ipip encapsulation finds gre0 as next output device. + +This results in the following pattern: + +1). First packet is to be sent out via gre0. +Route lookup found an output device, ipip0. + +2). +ip_tunnel_xmit for gre0 bumps gre0->needed_headroom based on the future +output device, rt.dev->needed_headroom (ipip0). + +3). +ip output / start_xmit moves skb on to ipip0. which runs the same +code path again (xmit recursion). + +4). +Routing step for the post-gre0-encap packet finds gre0 as output device +to use for ipip0 encapsulated packet. + +tunl0->needed_headroom is then incremented based on the (already bumped) +gre0 device headroom. + +This repeats for every future packet: + +gre0->needed_headroom gets inflated because previous packets' ipip0 step +incremented rt->dev (gre0) headroom, and ipip0 incremented because gre0 +needed_headroom was increased. + +For each subsequent packet, gre/ipip0->needed_headroom grows until +post-expand-head reallocations result in a skb->head/data distance of +more than 64k. + +Once that happens, skb->network_header (u16) wraps around when +pskb_expand_head tries to make sure that skb_network_offset() is unchanged +after the headroom expansion/reallocation. + +After this skb_network_offset(skb) returns a different (and negative) +result post headroom expansion. + +The next trip to neigh layer (or anything else that would __skb_pull the +network header) makes skb->data point to a memory location outside +skb->head area. + +v2: Cap the needed_headroom update to an arbitarily chosen upperlimit to +prevent perpetual increase instead of dropping the headroom increment +completely. + +Reported-and-tested-by: syzbot+bfde3bef047a81b8fde6@syzkaller.appspotmail.com +Closes: https://groups.google.com/g/syzkaller-bugs/c/fL9G6GtWskY/m/VKk_PR5FBAAJ +Fixes: 243aad830e8a ("ip_gre: include route header_len in max_headroom calculation") +Signed-off-by: Florian Westphal +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20240220135606.4939-1-fw@strlen.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/ip_tunnel.c | 28 +++++++++++++++++++++------- + 1 file changed, 21 insertions(+), 7 deletions(-) + +diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c +index 426dc910aaf87..96b7cd3049a33 100644 +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + return 0; + } + ++static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) ++{ ++ /* we must cap headroom to some upperlimit, else pskb_expand_head ++ * will overflow header offsets in skb_headers_offset_update(). ++ */ ++ static const unsigned int max_allowed = 512; ++ ++ if (headroom > max_allowed) ++ headroom = max_allowed; ++ ++ if (headroom > READ_ONCE(dev->needed_headroom)) ++ WRITE_ONCE(dev->needed_headroom, headroom); ++} ++ + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + u8 proto, int tunnel_hlen) + { +@@ -613,13 +627,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + } + + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; +- if (headroom > READ_ONCE(dev->needed_headroom)) +- WRITE_ONCE(dev->needed_headroom, headroom); +- +- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ++ if (skb_cow_head(skb, headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } ++ ++ ip_tunnel_adj_headroom(dev, headroom); ++ + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); + return; +@@ -797,16 +811,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); +- if (max_headroom > READ_ONCE(dev->needed_headroom)) +- WRITE_ONCE(dev->needed_headroom, max_headroom); + +- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ++ if (skb_cow_head(skb, max_headroom)) { + ip_rt_put(rt); + dev->stats.tx_dropped++; + kfree_skb(skb); + return; + } + ++ ip_tunnel_adj_headroom(dev, max_headroom); ++ + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); + return; +-- +2.43.0 + diff --git a/queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch b/queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch new file mode 100644 index 00000000000..7af1359c81d --- /dev/null +++ b/queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch @@ -0,0 +1,49 @@ +From 07a2d624ff2c54511c92e6d2a8440348a4b97921 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 25 Feb 2024 00:20:06 +0100 +Subject: net: usb: dm9601: fix wrong return value in dm9601_mdio_read + +From: Javier Carrasco + +[ Upstream commit c68b2c9eba38ec3f60f4894b189090febf4d8d22 ] + +The MII code does not check the return value of mdio_read (among +others), and therefore no error code should be sent. A previous fix to +the use of an uninitialized variable propagates negative error codes, +that might lead to wrong operations by the MII library. + +An example of such issues is the use of mii_nway_restart by the dm9601 +driver. The mii_nway_restart function does not check the value returned +by mdio_read, which in this case might be a negative number which could +contain the exact bit the function checks (BMCR_ANENABLE = 0x1000). + +Return zero in case of error, as it is common practice in users of +mdio_read to avoid wrong uses of the return value. + +Fixes: 8f8abb863fa5 ("net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read") +Signed-off-by: Javier Carrasco +Reviewed-by: Simon Horman +Reviewed-by: Peter Korsgaard +Link: https://lore.kernel.org/r/20240225-dm9601_ret_err-v1-1-02c1d959ea59@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/usb/dm9601.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c +index 1959e12a3ff8a..f7357d884d6aa 100644 +--- a/drivers/net/usb/dm9601.c ++++ b/drivers/net/usb/dm9601.c +@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) + err = dm_read_shared_word(dev, 1, loc, &res); + if (err < 0) { + netdev_err(dev->net, "MDIO read error: %d\n", err); +- return err; ++ return 0; + } + + netdev_dbg(dev->net, +-- +2.43.0 + diff --git a/queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch b/queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch new file mode 100644 index 00000000000..9ee755657f5 --- /dev/null +++ b/queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch @@ -0,0 +1,121 @@ +From 8efa248bc406923275a53b52bf40ba3122ff59d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Feb 2024 15:12:10 -0800 +Subject: net: veth: clear GRO when clearing XDP even when down +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jakub Kicinski + +[ Upstream commit fe9f801355f0b47668419f30f1fac1cf4539e736 ] + +veth sets NETIF_F_GRO automatically when XDP is enabled, +because both features use the same NAPI machinery. + +The logic to clear NETIF_F_GRO sits in veth_disable_xdp() which +is called both on ndo_stop and when XDP is turned off. +To avoid the flag from being cleared when the device is brought +down, the clearing is skipped when IFF_UP is not set. +Bringing the device down should indeed not modify its features. + +Unfortunately, this means that clearing is also skipped when +XDP is disabled _while_ the device is down. And there's nothing +on the open path to bring the device features back into sync. +IOW if user enables XDP, disables it and then brings the device +up we'll end up with a stray GRO flag set but no NAPI instances. + +We don't depend on the GRO flag on the datapath, so the datapath +won't crash. We will crash (or hang), however, next time features +are sync'ed (either by user via ethtool or peer changing its config). +The GRO flag will go away, and veth will try to disable the NAPIs. +But the open path never created them since XDP was off, the GRO flag +was a stray. If NAPI was initialized before we'll hang in napi_disable(). +If it never was we'll crash trying to stop uninitialized hrtimer. + +Move the GRO flag updates to the XDP enable / disable paths, +instead of mixing them with the ndo_open / ndo_close paths. + +Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") +Reported-by: Thomas Gleixner +Reported-by: syzbot+039399a9b96297ddedca@syzkaller.appspotmail.com +Signed-off-by: Jakub Kicinski +Reviewed-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/veth.c | 35 +++++++++++++++++------------------ + 1 file changed, 17 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 984a153804096..85c3e12f83627 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1079,14 +1079,6 @@ static int veth_enable_xdp(struct net_device *dev) + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); + return err; + } +- +- if (!veth_gro_requested(dev)) { +- /* user-space did not require GRO, but adding XDP +- * is supposed to get GRO working +- */ +- dev->features |= NETIF_F_GRO; +- netdev_features_change(dev); +- } + } + } + +@@ -1106,18 +1098,9 @@ static void veth_disable_xdp(struct net_device *dev) + for (i = 0; i < dev->real_num_rx_queues; i++) + rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); + +- if (!netif_running(dev) || !veth_gro_requested(dev)) { ++ if (!netif_running(dev) || !veth_gro_requested(dev)) + veth_napi_del(dev); + +- /* if user-space did not require GRO, since adding XDP +- * enabled it, clear it now +- */ +- if (!veth_gro_requested(dev) && netif_running(dev)) { +- dev->features &= ~NETIF_F_GRO; +- netdev_features_change(dev); +- } +- } +- + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); + } + +@@ -1497,6 +1480,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + } + + if (!old_prog) { ++ if (!veth_gro_requested(dev)) { ++ /* user-space did not require GRO, but adding ++ * XDP is supposed to get GRO working ++ */ ++ dev->features |= NETIF_F_GRO; ++ netdev_features_change(dev); ++ } ++ + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; + peer->max_mtu = max_mtu; + } +@@ -1507,6 +1498,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + if (dev->flags & IFF_UP) + veth_disable_xdp(dev); + ++ /* if user-space did not require GRO, since adding XDP ++ * enabled it, clear it now ++ */ ++ if (!veth_gro_requested(dev)) { ++ dev->features &= ~NETIF_F_GRO; ++ netdev_features_change(dev); ++ } ++ + if (peer) { + peer->hw_features |= NETIF_F_GSO_SOFTWARE; + peer->max_mtu = ETH_MAX_MTU; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch b/queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch new file mode 100644 index 00000000000..1333ded6a20 --- /dev/null +++ b/queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch @@ -0,0 +1,282 @@ +From d193e211fa7b4fbc137c4378057862b37c14f657 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Feb 2024 16:17:51 +0100 +Subject: netfilter: bridge: confirm multicast packets before passing them up + the stack + +From: Florian Westphal + +[ Upstream commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9 ] + +conntrack nf_confirm logic cannot handle cloned skbs referencing +the same nf_conn entry, which will happen for multicast (broadcast) +frames on bridges. + + Example: + macvlan0 + | + br0 + / \ + ethX ethY + + ethX (or Y) receives a L2 multicast or broadcast packet containing + an IP packet, flow is not yet in conntrack table. + + 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + -> skb->_nfct now references a unconfirmed entry + 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + interface. + 3. skb gets passed up the stack. + 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + and schedules a work queue to send them out on the lower devices. + + The clone skb->_nfct is not a copy, it is the same entry as the + original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + +The Macvlan broadcast worker and normal confirm path will race. + +This race will not happen if step 2 already confirmed a clone. In that +case later steps perform skb_clone() with skb->_nfct already confirmed (in +hash table). This works fine. + +But such confirmation won't happen when eb/ip/nftables rules dropped the +packets before they reached the nf_confirm step in postrouting. + +Pablo points out that nf_conntrack_bridge doesn't allow use of stateful +nat, so we can safely discard the nf_conn entry and let inet call +conntrack again. + +This doesn't work for bridge netfilter: skb could have a nat +transformation. Also bridge nf prevents re-invocation of inet prerouting +via 'sabotage_in' hook. + +Work around this problem by explicit confirmation of the entry at LOCAL_IN +time, before upper layer has a chance to clone the unconfirmed entry. + +The downside is that this disables NAT and conntrack helpers. + +Alternative fix would be to add locking to all code parts that deal with +unconfirmed packets, but even if that could be done in a sane way this +opens up other problems, for example: + +-m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4 +-m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5 + +For multicast case, only one of such conflicting mappings will be +created, conntrack only handles 1:1 NAT mappings. + +Users should set create a setup that explicitly marks such traffic +NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass +them, ruleset might have accept rules for untracked traffic already, +so user-visible behaviour would change. + +Suggested-by: Pablo Neira Ayuso +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777 +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/linux/netfilter.h | 1 + + net/bridge/br_netfilter_hooks.c | 96 ++++++++++++++++++++++ + net/bridge/netfilter/nf_conntrack_bridge.c | 30 +++++++ + net/netfilter/nf_conntrack_core.c | 1 + + 4 files changed, 128 insertions(+) + +diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h +index c92bb1580f419..c69cbd64b5b46 100644 +--- a/include/linux/netfilter.h ++++ b/include/linux/netfilter.h +@@ -461,6 +461,7 @@ struct nf_ct_hook { + const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); ++ int (*confirm)(struct sk_buff *skb); + }; + extern const struct nf_ct_hook __rcu *nf_ct_hook; + +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index f14beb9a62edb..8a114a5000466 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -43,6 +43,10 @@ + #include + #endif + ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++#include ++#endif ++ + static unsigned int brnf_net_id __read_mostly; + + struct brnf_net { +@@ -537,6 +541,90 @@ static unsigned int br_nf_pre_routing(void *priv, + return NF_STOLEN; + } + ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++/* conntracks' nf_confirm logic cannot handle cloned skbs referencing ++ * the same nf_conn entry, which will happen for multicast (broadcast) ++ * Frames on bridges. ++ * ++ * Example: ++ * macvlan0 ++ * br0 ++ * ethX ethY ++ * ++ * ethX (or Y) receives multicast or broadcast packet containing ++ * an IP packet, not yet in conntrack table. ++ * ++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. ++ * -> skb->_nfct now references a unconfirmed entry ++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge ++ * interface. ++ * 3. skb gets passed up the stack. ++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb ++ * and schedules a work queue to send them out on the lower devices. ++ * ++ * The clone skb->_nfct is not a copy, it is the same entry as the ++ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. ++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. ++ * ++ * The Macvlan broadcast worker and normal confirm path will race. ++ * ++ * This race will not happen if step 2 already confirmed a clone. In that ++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in ++ * hash table). This works fine. ++ * ++ * But such confirmation won't happen when eb/ip/nftables rules dropped the ++ * packets before they reached the nf_confirm step in postrouting. ++ * ++ * Work around this problem by explicit confirmation of the entry at ++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed ++ * entry. ++ * ++ */ ++static unsigned int br_nf_local_in(void *priv, ++ struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct nf_conntrack *nfct = skb_nfct(skb); ++ const struct nf_ct_hook *ct_hook; ++ struct nf_conn *ct; ++ int ret; ++ ++ if (!nfct || skb->pkt_type == PACKET_HOST) ++ return NF_ACCEPT; ++ ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ if (likely(nf_ct_is_confirmed(ct))) ++ return NF_ACCEPT; ++ ++ WARN_ON_ONCE(skb_shared(skb)); ++ WARN_ON_ONCE(refcount_read(&nfct->use) != 1); ++ ++ /* We can't call nf_confirm here, it would create a dependency ++ * on nf_conntrack module. ++ */ ++ ct_hook = rcu_dereference(nf_ct_hook); ++ if (!ct_hook) { ++ skb->_nfct = 0ul; ++ nf_conntrack_put(nfct); ++ return NF_ACCEPT; ++ } ++ ++ nf_bridge_pull_encap_header(skb); ++ ret = ct_hook->confirm(skb); ++ switch (ret & NF_VERDICT_MASK) { ++ case NF_STOLEN: ++ return NF_STOLEN; ++ default: ++ nf_bridge_push_encap_header(skb); ++ break; ++ } ++ ++ ct = container_of(nfct, struct nf_conn, ct_general); ++ WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); ++ ++ return ret; ++} ++#endif + + /* PF_BRIDGE/FORWARD *************************************************/ + static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +@@ -935,6 +1023,14 @@ static const struct nf_hook_ops br_nf_ops[] = { + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, + }, ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) ++ { ++ .hook = br_nf_local_in, ++ .pf = NFPROTO_BRIDGE, ++ .hooknum = NF_BR_LOCAL_IN, ++ .priority = NF_BR_PRI_LAST, ++ }, ++#endif + { + .hook = br_nf_forward_ip, + .pf = NFPROTO_BRIDGE, +diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c +index d14b2dbbd1dfb..83743e95939b1 100644 +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -290,6 +290,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + return nf_conntrack_in(skb, &bridge_state); + } + ++static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct; ++ ++ if (skb->pkt_type == PACKET_HOST) ++ return NF_ACCEPT; ++ ++ /* nf_conntrack_confirm() cannot handle concurrent clones, ++ * this happens for broad/multicast frames with e.g. macvlan on top ++ * of the bridge device. ++ */ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) ++ return NF_ACCEPT; ++ ++ /* let inet prerouting call conntrack again */ ++ skb->_nfct = 0; ++ nf_ct_put(ct); ++ ++ return NF_ACCEPT; ++} ++ + static void nf_ct_bridge_frag_save(struct sk_buff *skb, + struct nf_bridge_frag_data *data) + { +@@ -414,6 +438,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, ++ { ++ .hook = nf_ct_bridge_in, ++ .pf = NFPROTO_BRIDGE, ++ .hooknum = NF_BR_LOCAL_IN, ++ .priority = NF_IP_PRI_CONNTRACK_CONFIRM, ++ }, + { + .hook = nf_ct_bridge_post, + .pf = NFPROTO_BRIDGE, +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index e0f4f76439d3d..be6031886f942 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -2850,6 +2850,7 @@ static const struct nf_ct_hook nf_conntrack_hook = { + .get_tuple_skb = nf_conntrack_get_tuple_skb, + .attach = nf_conntrack_attach, + .set_closing = nf_conntrack_set_closing, ++ .confirm = __nf_conntrack_confirm, + }; + + void nf_conntrack_init_end(void) +-- +2.43.0 + diff --git a/queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch b/queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch new file mode 100644 index 00000000000..70c0eed7eab --- /dev/null +++ b/queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch @@ -0,0 +1,109 @@ +From 2bf15b346e8c5bbdbf2bf246a13d8a784adc60c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jan 2022 05:03:23 +0100 +Subject: netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook + +From: Florian Westphal + +[ Upstream commit 3fce16493dc1aa2c9af3d7e7bd360dfe203a3e6a ] + +ip_ct_attach predates struct nf_ct_hook, we can place it there and +remove the exported symbol. + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Signed-off-by: Sasha Levin +--- + include/linux/netfilter.h | 2 +- + net/netfilter/core.c | 19 ++++++++----------- + net/netfilter/nf_conntrack_core.c | 4 +--- + 3 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h +index e20c2db0f2c16..64acdf22eb4fa 100644 +--- a/include/linux/netfilter.h ++++ b/include/linux/netfilter.h +@@ -435,7 +435,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) + #if IS_ENABLED(CONFIG_NF_CONNTRACK) + #include + +-extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; + void nf_ct_attach(struct sk_buff *, const struct sk_buff *); + struct nf_conntrack_tuple; + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, +@@ -458,6 +457,7 @@ struct nf_ct_hook { + void (*destroy)(struct nf_conntrack *); + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, + const struct sk_buff *); ++ void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + }; + extern struct nf_ct_hook __rcu *nf_ct_hook; + +diff --git a/net/netfilter/core.c b/net/netfilter/core.c +index ffa84cafb746b..5396d27ba6a71 100644 +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -639,25 +639,22 @@ struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; + EXPORT_SYMBOL_GPL(nf_ct_hook); + + #if IS_ENABLED(CONFIG_NF_CONNTRACK) +-/* This does not belong here, but locally generated errors need it if connection +- tracking in use: without this, connection may not be in hash table, and hence +- manufactured ICMP or RST packets will not be associated with it. */ +-void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) +- __rcu __read_mostly; +-EXPORT_SYMBOL(ip_ct_attach); +- + struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; + EXPORT_SYMBOL_GPL(nf_nat_hook); + ++/* This does not belong here, but locally generated errors need it if connection ++ * tracking in use: without this, connection may not be in hash table, and hence ++ * manufactured ICMP or RST packets will not be associated with it. ++ */ + void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) + { +- void (*attach)(struct sk_buff *, const struct sk_buff *); ++ const struct nf_ct_hook *ct_hook; + + if (skb->_nfct) { + rcu_read_lock(); +- attach = rcu_dereference(ip_ct_attach); +- if (attach) +- attach(new, skb); ++ ct_hook = rcu_dereference(nf_ct_hook); ++ if (ct_hook) ++ ct_hook->attach(new, skb); + rcu_read_unlock(); + } + } +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 10622760f894a..779e41d1afdce 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -2518,7 +2518,6 @@ static int kill_all(struct nf_conn *i, void *data) + void nf_conntrack_cleanup_start(void) + { + conntrack_gc_work.exiting = true; +- RCU_INIT_POINTER(ip_ct_attach, NULL); + } + + void nf_conntrack_cleanup_end(void) +@@ -2838,12 +2837,11 @@ static struct nf_ct_hook nf_conntrack_hook = { + .update = nf_conntrack_update, + .destroy = nf_ct_destroy, + .get_tuple_skb = nf_conntrack_get_tuple_skb, ++ .attach = nf_conntrack_attach, + }; + + void nf_conntrack_init_end(void) + { +- /* For use by REJECT target */ +- RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); + } + +-- +2.43.0 + diff --git a/queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch b/queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch new file mode 100644 index 00000000000..6b39f3d847a --- /dev/null +++ b/queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch @@ -0,0 +1,229 @@ +From 66e009926a2ed09a54351ac4e2b0c6ccb7423bdf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Feb 2023 14:45:22 +0100 +Subject: netfilter: let reset rules clean out conntrack entries + +From: Florian Westphal + +[ Upstream commit 2954fe60e33da0f4de4d81a4c95c7dddb517d00c ] + +iptables/nftables support responding to tcp packets with tcp resets. + +The generated tcp reset packet passes through both output and postrouting +netfilter hooks, but conntrack will never see them because the generated +skb has its ->nfct pointer copied over from the packet that triggered the +reset rule. + +If the reset rule is used for established connections, this +may result in the conntrack entry to be around for a very long +time (default timeout is 5 days). + +One way to avoid this would be to not copy the nf_conn pointer +so that the rest packet passes through conntrack too. + +Problem is that output rules might not have the same conntrack +zone setup as the prerouting ones, so its possible that the +reset skb won't find the correct entry. Generating a template +entry for the skb seems error prone as well. + +Add an explicit "closing" function that switches a confirmed +conntrack entry to closed state and wire this up for tcp. + +If the entry isn't confirmed, no action is needed because +the conntrack entry will never be committed to the table. + +Reported-by: Russel King +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Signed-off-by: Sasha Levin +--- + include/linux/netfilter.h | 3 +++ + include/net/netfilter/nf_conntrack.h | 8 ++++++ + net/ipv4/netfilter/nf_reject_ipv4.c | 1 + + net/ipv6/netfilter/nf_reject_ipv6.c | 1 + + net/netfilter/core.c | 16 ++++++++++++ + net/netfilter/nf_conntrack_core.c | 12 +++++++++ + net/netfilter/nf_conntrack_proto_tcp.c | 35 ++++++++++++++++++++++++++ + 7 files changed, 76 insertions(+) + +diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h +index 5a665034c30be..c92bb1580f419 100644 +--- a/include/linux/netfilter.h ++++ b/include/linux/netfilter.h +@@ -436,11 +436,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) + #include + + void nf_ct_attach(struct sk_buff *, const struct sk_buff *); ++void nf_ct_set_closing(struct nf_conntrack *nfct); + struct nf_conntrack_tuple; + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb); + #else + static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} ++static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} + struct nf_conntrack_tuple; + static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +@@ -458,6 +460,7 @@ struct nf_ct_hook { + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, + const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); ++ void (*set_closing)(struct nf_conntrack *nfct); + }; + extern const struct nf_ct_hook __rcu *nf_ct_hook; + +diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h +index 34c266502a50e..39541ab912a16 100644 +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -123,6 +123,12 @@ struct nf_conn { + union nf_conntrack_proto proto; + }; + ++static inline struct nf_conn * ++nf_ct_to_nf_conn(const struct nf_conntrack *nfct) ++{ ++ return container_of(nfct, struct nf_conn, ct_general); ++} ++ + static inline struct nf_conn * + nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) + { +@@ -173,6 +179,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) + + void nf_ct_destroy(struct nf_conntrack *nfct); + ++void nf_conntrack_tcp_set_closing(struct nf_conn *ct); ++ + /* decrement reference count on a conntrack */ + static inline void nf_ct_put(struct nf_conn *ct) + { +diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c +index f2edb40c0db00..350aaca126181 100644 +--- a/net/ipv4/netfilter/nf_reject_ipv4.c ++++ b/net/ipv4/netfilter/nf_reject_ipv4.c +@@ -278,6 +278,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, + goto free_nskb; + + nf_ct_attach(nskb, oldskb); ++ nf_ct_set_closing(skb_nfct(oldskb)); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip_local_out for bridged traffic, the MAC source on +diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c +index dffeaaaadcded..c0057edd84cfc 100644 +--- a/net/ipv6/netfilter/nf_reject_ipv6.c ++++ b/net/ipv6/netfilter/nf_reject_ipv6.c +@@ -345,6 +345,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); + + nf_ct_attach(nskb, oldskb); ++ nf_ct_set_closing(skb_nfct(oldskb)); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip6_local_out for bridged traffic, the MAC source on +diff --git a/net/netfilter/core.c b/net/netfilter/core.c +index aa3f7d3228fda..fe81824799d95 100644 +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -674,6 +674,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) + } + EXPORT_SYMBOL(nf_conntrack_destroy); + ++void nf_ct_set_closing(struct nf_conntrack *nfct) ++{ ++ const struct nf_ct_hook *ct_hook; ++ ++ if (!nfct) ++ return; ++ ++ rcu_read_lock(); ++ ct_hook = rcu_dereference(nf_ct_hook); ++ if (ct_hook) ++ ct_hook->set_closing(nfct); ++ ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_ct_set_closing); ++ + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) + { +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 2a4222eefc894..e0f4f76439d3d 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -2833,11 +2833,23 @@ int nf_conntrack_init_start(void) + return ret; + } + ++static void nf_conntrack_set_closing(struct nf_conntrack *nfct) ++{ ++ struct nf_conn *ct = nf_ct_to_nf_conn(nfct); ++ ++ switch (nf_ct_protonum(ct)) { ++ case IPPROTO_TCP: ++ nf_conntrack_tcp_set_closing(ct); ++ break; ++ } ++} ++ + static const struct nf_ct_hook nf_conntrack_hook = { + .update = nf_conntrack_update, + .destroy = nf_ct_destroy, + .get_tuple_skb = nf_conntrack_get_tuple_skb, + .attach = nf_conntrack_attach, ++ .set_closing = nf_conntrack_set_closing, + }; + + void nf_conntrack_init_end(void) +diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c +index 1ecfdc4f23be8..f33e6aea7f4da 100644 +--- a/net/netfilter/nf_conntrack_proto_tcp.c ++++ b/net/netfilter/nf_conntrack_proto_tcp.c +@@ -870,6 +870,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) + return false; + } + ++void nf_conntrack_tcp_set_closing(struct nf_conn *ct) ++{ ++ enum tcp_conntrack old_state; ++ const unsigned int *timeouts; ++ u32 timeout; ++ ++ if (!nf_ct_is_confirmed(ct)) ++ return; ++ ++ spin_lock_bh(&ct->lock); ++ old_state = ct->proto.tcp.state; ++ ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; ++ ++ if (old_state == TCP_CONNTRACK_CLOSE || ++ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { ++ spin_unlock_bh(&ct->lock); ++ return; ++ } ++ ++ timeouts = nf_ct_timeout_lookup(ct); ++ if (!timeouts) { ++ const struct nf_tcp_net *tn; ++ ++ tn = nf_tcp_pernet(nf_ct_net(ct)); ++ timeouts = tn->timeouts; ++ } ++ ++ timeout = timeouts[TCP_CONNTRACK_CLOSE]; ++ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); ++ ++ spin_unlock_bh(&ct->lock); ++ ++ nf_conntrack_event_cache(IPCT_PROTOINFO, ct); ++} ++ + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) + { + state->td_end = 0; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-make-function-op-structures-const.patch b/queue-5.15/netfilter-make-function-op-structures-const.patch new file mode 100644 index 00000000000..439a1930870 --- /dev/null +++ b/queue-5.15/netfilter-make-function-op-structures-const.patch @@ -0,0 +1,208 @@ +From 3ce961662e11bb6747c87f0831db00013039dbc6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jan 2022 05:03:24 +0100 +Subject: netfilter: make function op structures const + +From: Florian Westphal + +[ Upstream commit 285c8a7a58158cb1805c97ff03875df2ba2ea1fe ] + +No functional changes, these structures should be const. + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Signed-off-by: Sasha Levin +--- + include/linux/netfilter.h | 8 ++++---- + net/netfilter/core.c | 10 +++++----- + net/netfilter/nf_conntrack_core.c | 4 ++-- + net/netfilter/nf_conntrack_netlink.c | 4 ++-- + net/netfilter/nf_nat_core.c | 2 +- + net/netfilter/nfnetlink_queue.c | 8 ++++---- + 6 files changed, 18 insertions(+), 18 deletions(-) + +diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h +index 64acdf22eb4fa..5a665034c30be 100644 +--- a/include/linux/netfilter.h ++++ b/include/linux/netfilter.h +@@ -376,13 +376,13 @@ struct nf_nat_hook { + enum ip_conntrack_dir dir); + }; + +-extern struct nf_nat_hook __rcu *nf_nat_hook; ++extern const struct nf_nat_hook __rcu *nf_nat_hook; + + static inline void + nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) + { + #if IS_ENABLED(CONFIG_NF_NAT) +- struct nf_nat_hook *nat_hook; ++ const struct nf_nat_hook *nat_hook; + + rcu_read_lock(); + nat_hook = rcu_dereference(nf_nat_hook); +@@ -459,7 +459,7 @@ struct nf_ct_hook { + const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + }; +-extern struct nf_ct_hook __rcu *nf_ct_hook; ++extern const struct nf_ct_hook __rcu *nf_ct_hook; + + struct nlattr; + +@@ -474,7 +474,7 @@ struct nfnl_ct_hook { + void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, s32 off); + }; +-extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; ++extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; + + /** + * nf_skb_duplicated - TEE target has sent a packet +diff --git a/net/netfilter/core.c b/net/netfilter/core.c +index 5396d27ba6a71..aa3f7d3228fda 100644 +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -632,14 +632,14 @@ EXPORT_SYMBOL(nf_hook_slow_list); + /* This needs to be compiled in any case to avoid dependencies between the + * nfnetlink_queue code and nf_conntrack. + */ +-struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; ++const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; + EXPORT_SYMBOL_GPL(nfnl_ct_hook); + +-struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; ++const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; + EXPORT_SYMBOL_GPL(nf_ct_hook); + + #if IS_ENABLED(CONFIG_NF_CONNTRACK) +-struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; ++const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; + EXPORT_SYMBOL_GPL(nf_nat_hook); + + /* This does not belong here, but locally generated errors need it if connection +@@ -662,7 +662,7 @@ EXPORT_SYMBOL(nf_ct_attach); + + void nf_conntrack_destroy(struct nf_conntrack *nfct) + { +- struct nf_ct_hook *ct_hook; ++ const struct nf_ct_hook *ct_hook; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); +@@ -677,7 +677,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy); + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) + { +- struct nf_ct_hook *ct_hook; ++ const struct nf_ct_hook *ct_hook; + bool ret = false; + + rcu_read_lock(); +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 779e41d1afdce..2a4222eefc894 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -2145,9 +2145,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) + { ++ const struct nf_nat_hook *nat_hook; + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; +- struct nf_nat_hook *nat_hook; + unsigned int status; + int dataoff; + u16 l3num; +@@ -2833,7 +2833,7 @@ int nf_conntrack_init_start(void) + return ret; + } + +-static struct nf_ct_hook nf_conntrack_hook = { ++static const struct nf_ct_hook nf_conntrack_hook = { + .update = nf_conntrack_update, + .destroy = nf_ct_destroy, + .get_tuple_skb = nf_conntrack_get_tuple_skb, +diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c +index c427f7625a3b5..1466015bc56dc 100644 +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -1816,7 +1816,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, + const struct nlattr *attr) + __must_hold(RCU) + { +- struct nf_nat_hook *nat_hook; ++ const struct nf_nat_hook *nat_hook; + int err; + + nat_hook = rcu_dereference(nf_nat_hook); +@@ -2922,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, + nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff); + } + +-static struct nfnl_ct_hook ctnetlink_glue_hook = { ++static const struct nfnl_ct_hook ctnetlink_glue_hook = { + .build_size = ctnetlink_glue_build_size, + .build = ctnetlink_glue_build, + .parse = ctnetlink_glue_parse, +diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c +index 2731176839228..b776b3af78ca2 100644 +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -1120,7 +1120,7 @@ static struct pernet_operations nat_net_ops = { + .size = sizeof(struct nat_net), + }; + +-static struct nf_nat_hook nat_hook = { ++static const struct nf_nat_hook nat_hook = { + .parse_nat_setup = nfnetlink_parse_nat_setup, + #ifdef CONFIG_XFRM + .decode_session = __nf_nat_decode_session, +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index f4468ef3d0a94..8c96e01f6a023 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) + + static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) + { +- struct nf_ct_hook *ct_hook; ++ const struct nf_ct_hook *ct_hook; + int err; + + if (verdict == NF_ACCEPT || +@@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + struct net_device *outdev; + struct nf_conn *ct = NULL; + enum ip_conntrack_info ctinfo = 0; +- struct nfnl_ct_hook *nfnl_ct; ++ const struct nfnl_ct_hook *nfnl_ct; + bool csum_verify; + char *secdata = NULL; + u32 seclen = 0; +@@ -1115,7 +1115,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb, + return 0; + } + +-static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, ++static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[], + struct nf_queue_entry *entry, +@@ -1182,11 +1182,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, + { + struct nfnl_queue_net *q = nfnl_queue_pernet(info->net); + u_int16_t queue_num = ntohs(info->nfmsg->res_id); ++ const struct nfnl_ct_hook *nfnl_ct; + struct nfqnl_msg_verdict_hdr *vhdr; + enum ip_conntrack_info ctinfo; + struct nfqnl_instance *queue; + struct nf_queue_entry *entry; +- struct nfnl_ct_hook *nfnl_ct; + struct nf_conn *ct = NULL; + unsigned int verdict; + int err; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch b/queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch new file mode 100644 index 00000000000..637880977a4 --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch @@ -0,0 +1,108 @@ +From 35cc9aa0daa8cad7022e1c2a6d76448d4d912e79 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Feb 2024 10:33:08 +0000 +Subject: netfilter: nf_tables: allow NFPROTO_INET in + nft_(match/target)_validate() + +From: Ignat Korchagin + +[ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ] + +Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added +some validation of NFPROTO_* families in the nft_compat module, but it broke +the ability to use legacy iptables modules in dual-stack nftables. + +While with legacy iptables one had to independently manage IPv4 and IPv6 +tables, with nftables it is possible to have dual-stack tables sharing the +rules. Moreover, it was possible to use rules based on legacy iptables +match/target modules in dual-stack nftables. + +As an example, the program from [2] creates an INET dual-stack family table +using an xt_bpf based rule, which looks like the following (the actual output +was generated with a patched nft tool as the current nft tool does not parse +dual stack tables with legacy match rules, so consider it for illustrative +purposes only): + +table inet testfw { + chain input { + type filter hook prerouting priority filter; policy accept; + bytecode counter packets 0 bytes 0 accept + } +} + +After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get +EOPNOTSUPP for the above program. + +Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also +restrict the functions to classic iptables hooks. + +Changes in v3: + * clarify that upstream nft will not display such configuration properly and + that the output was generated with a patched nft tool + * remove example program from commit description and link to it instead + * no code changes otherwise + +Changes in v2: + * restrict nft_(match/target)_validate() to classic iptables hooks + * rewrite example program to use unmodified libnftnl + +Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") +Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1] +Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2] +Reported-by: Jordan Griege +Signed-off-by: Ignat Korchagin +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_compat.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index 64a2a5f195896..aee046e00bfaf 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +@@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch b/queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch new file mode 100644 index 00000000000..cf15e5d9b12 --- /dev/null +++ b/queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch @@ -0,0 +1,40 @@ +From c2690174b5a63513a27ed277b2ffae498151a2db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Nov 2021 13:04:03 +0100 +Subject: netfilter: nfnetlink_queue: silence bogus compiler warning + +From: Florian Westphal + +[ Upstream commit b43c2793f5e9910862e8fe07846b74e45b104501 ] + +net/netfilter/nfnetlink_queue.c:601:36: warning: variable 'ctinfo' is +uninitialized when used here [-Wuninitialized] + if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0) + +ctinfo is only uninitialized if ct == NULL. Init it to 0 to silence this. + +Reported-by: kernel test robot +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") +Signed-off-by: Sasha Levin +--- + net/netfilter/nfnetlink_queue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index 5329ebf19a18b..f4468ef3d0a94 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + struct net_device *indev; + struct net_device *outdev; + struct nf_conn *ct = NULL; +- enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_info ctinfo = 0; + struct nfnl_ct_hook *nfnl_ct; + bool csum_verify; + char *secdata = NULL; +-- +2.43.0 + diff --git a/queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch b/queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch new file mode 100644 index 00000000000..e790b76a219 --- /dev/null +++ b/queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch @@ -0,0 +1,134 @@ +From 237b2b023ee1bef92716db824b94220a99b18a0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Feb 2024 16:40:48 +0900 +Subject: netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter + +From: Ryosuke Yasuoka + +[ Upstream commit 661779e1fcafe1b74b3f3fe8e980c1e207fea1fd ] + +syzbot reported the following uninit-value access issue [1]: + +netlink_to_full_skb() creates a new `skb` and puts the `skb->data` +passed as a 1st arg of netlink_to_full_skb() onto new `skb`. The data +size is specified as `len` and passed to skb_put_data(). This `len` +is based on `skb->end` that is not data offset but buffer offset. The +`skb->end` contains data and tailroom. Since the tailroom is not +initialized when the new `skb` created, KMSAN detects uninitialized +memory area when copying the data. + +This patch resolved this issue by correct the len from `skb->end` to +`skb->len`, which is the actual data offset. + +BUG: KMSAN: kernel-infoleak-after-free in instrument_copy_to_user include/linux/instrumented.h:114 [inline] +BUG: KMSAN: kernel-infoleak-after-free in copy_to_user_iter lib/iov_iter.c:24 [inline] +BUG: KMSAN: kernel-infoleak-after-free in iterate_ubuf include/linux/iov_iter.h:29 [inline] +BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] +BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance include/linux/iov_iter.h:271 [inline] +BUG: KMSAN: kernel-infoleak-after-free in _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 + instrument_copy_to_user include/linux/instrumented.h:114 [inline] + copy_to_user_iter lib/iov_iter.c:24 [inline] + iterate_ubuf include/linux/iov_iter.h:29 [inline] + iterate_and_advance2 include/linux/iov_iter.h:245 [inline] + iterate_and_advance include/linux/iov_iter.h:271 [inline] + _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 + copy_to_iter include/linux/uio.h:197 [inline] + simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:532 + __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:420 + skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 + skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] + packet_recvmsg+0xd9c/0x2000 net/packet/af_packet.c:3482 + sock_recvmsg_nosec net/socket.c:1044 [inline] + sock_recvmsg net/socket.c:1066 [inline] + sock_read_iter+0x467/0x580 net/socket.c:1136 + call_read_iter include/linux/fs.h:2014 [inline] + new_sync_read fs/read_write.c:389 [inline] + vfs_read+0x8f6/0xe00 fs/read_write.c:470 + ksys_read+0x20f/0x4c0 fs/read_write.c:613 + __do_sys_read fs/read_write.c:623 [inline] + __se_sys_read fs/read_write.c:621 [inline] + __x64_sys_read+0x93/0xd0 fs/read_write.c:621 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +Uninit was stored to memory at: + skb_put_data include/linux/skbuff.h:2622 [inline] + netlink_to_full_skb net/netlink/af_netlink.c:181 [inline] + __netlink_deliver_tap_skb net/netlink/af_netlink.c:298 [inline] + __netlink_deliver_tap+0x5be/0xc90 net/netlink/af_netlink.c:325 + netlink_deliver_tap net/netlink/af_netlink.c:338 [inline] + netlink_deliver_tap_kernel net/netlink/af_netlink.c:347 [inline] + netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] + netlink_unicast+0x10f1/0x1250 net/netlink/af_netlink.c:1368 + netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg net/socket.c:745 [inline] + ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 + ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 + __sys_sendmsg net/socket.c:2667 [inline] + __do_sys_sendmsg net/socket.c:2676 [inline] + __se_sys_sendmsg net/socket.c:2674 [inline] + __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +Uninit was created at: + free_pages_prepare mm/page_alloc.c:1087 [inline] + free_unref_page_prepare+0xb0/0xa40 mm/page_alloc.c:2347 + free_unref_page_list+0xeb/0x1100 mm/page_alloc.c:2533 + release_pages+0x23d3/0x2410 mm/swap.c:1042 + free_pages_and_swap_cache+0xd9/0xf0 mm/swap_state.c:316 + tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] + tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] + tlb_flush_mmu+0x6f5/0x980 mm/mmu_gather.c:300 + tlb_finish_mmu+0x101/0x260 mm/mmu_gather.c:392 + exit_mmap+0x49e/0xd30 mm/mmap.c:3321 + __mmput+0x13f/0x530 kernel/fork.c:1349 + mmput+0x8a/0xa0 kernel/fork.c:1371 + exit_mm+0x1b8/0x360 kernel/exit.c:567 + do_exit+0xd57/0x4080 kernel/exit.c:858 + do_group_exit+0x2fd/0x390 kernel/exit.c:1021 + __do_sys_exit_group kernel/exit.c:1032 [inline] + __se_sys_exit_group kernel/exit.c:1030 [inline] + __x64_sys_exit_group+0x3c/0x50 kernel/exit.c:1030 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +Bytes 3852-3903 of 3904 are uninitialized +Memory access of size 3904 starts at ffff88812ea1e000 +Data copied to user address 0000000020003280 + +CPU: 1 PID: 5043 Comm: syz-executor297 Not tainted 6.7.0-rc5-syzkaller-00047-g5bd7ef53ffe5 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 + +Fixes: 1853c9496460 ("netlink, mmap: transform mmap skb into full skb on taps") +Reported-and-tested-by: syzbot+34ad5fab48f7bf510349@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=34ad5fab48f7bf510349 [1] +Signed-off-by: Ryosuke Yasuoka +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240221074053.1794118-1-ryasuoka@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/netlink/af_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 2169a9c3da1c3..82df02695bbdd 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -165,7 +165,7 @@ static inline u32 netlink_group_mask(u32 group) + static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) + { +- unsigned int len = skb_end_offset(skb); ++ unsigned int len = skb->len; + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); +-- +2.43.0 + diff --git a/queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch b/queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch new file mode 100644 index 00000000000..c76a7b767dd --- /dev/null +++ b/queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch @@ -0,0 +1,51 @@ +From 1fa93963ef55c9a02940494d60aa163c7e3b573e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Feb 2024 16:51:33 +0100 +Subject: power: supply: bq27xxx-i2c: Do not free non existing IRQ + +From: Hans de Goede + +[ Upstream commit 2df70149e73e79783bcbc7db4fa51ecef0e2022c ] + +The bq27xxx i2c-client may not have an IRQ, in which case +client->irq will be 0. bq27xxx_battery_i2c_probe() already has +an if (client->irq) check wrapping the request_threaded_irq(). + +But bq27xxx_battery_i2c_remove() unconditionally calls +free_irq(client->irq) leading to: + +[ 190.310742] ------------[ cut here ]------------ +[ 190.310843] Trying to free already-free IRQ 0 +[ 190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310 + +Followed by a backtrace when unbinding the driver. Add +an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring +probe() to fix this. + +Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove") +Signed-off-by: Hans de Goede +Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com +Signed-off-by: Sebastian Reichel +Signed-off-by: Sasha Levin +--- + drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c +index b722ee2d7e142..4e5d773b3bf8d 100644 +--- a/drivers/power/supply/bq27xxx_battery_i2c.c ++++ b/drivers/power/supply/bq27xxx_battery_i2c.c +@@ -209,7 +209,9 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client) + { + struct bq27xxx_device_info *di = i2c_get_clientdata(client); + +- free_irq(client->irq, di); ++ if (client->irq) ++ free_irq(client->irq, di); ++ + bq27xxx_battery_teardown(di); + + mutex_lock(&battery_mutex); +-- +2.43.0 + diff --git a/queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch b/queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch new file mode 100644 index 00000000000..c388a238627 --- /dev/null +++ b/queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch @@ -0,0 +1,50 @@ +From 24732c67b0a17642fdcd5bd6aa3dd8e80c03f671 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Feb 2024 21:17:23 +0200 +Subject: riscv: Sparse-Memory/vmemmap out-of-bounds fix +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dimitris Vlachos + +[ Upstream commit a11dd49dcb9376776193e15641f84fcc1e5980c9 ] + +Offset vmemmap so that the first page of vmemmap will be mapped +to the first page of physical memory in order to ensure that +vmemmap’s bounds will be respected during +pfn_to_page()/page_to_pfn() operations. +The conversion macros will produce correct SV39/48/57 addresses +for every possible/valid DRAM_BASE inside the physical memory limits. + +v2:Address Alex's comments + +Suggested-by: Alexandre Ghiti +Signed-off-by: Dimitris Vlachos +Reported-by: Dimitris Vlachos +Closes: https://lore.kernel.org/linux-riscv/20240202135030.42265-1-csd4492@csd.uoc.gr +Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") +Reviewed-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240229191723.32779-1-dvlachos@ics.forth.gr +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/pgtable.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h +index 397cb945b16eb..9a3d9b68f2ff4 100644 +--- a/arch/riscv/include/asm/pgtable.h ++++ b/arch/riscv/include/asm/pgtable.h +@@ -58,7 +58,7 @@ + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +-#define vmemmap ((struct page *)VMEMMAP_START) ++#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) + + #define PCI_IO_SIZE SZ_16M + #define PCI_IO_END VMEMMAP_START +-- +2.43.0 + diff --git a/queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch b/queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch new file mode 100644 index 00000000000..b0ffc1881eb --- /dev/null +++ b/queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch @@ -0,0 +1,83 @@ +From ac5cea766854656b623a23a5d1370b9ea54d0fce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Feb 2024 20:11:28 +0800 +Subject: rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back + +From: Lin Ma + +[ Upstream commit 743ad091fb46e622f1b690385bb15e3cd3daf874 ] + +In the commit d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks +IFLA_BRIDGE_MODE length"), an adjustment was made to the old loop logic +in the function `rtnl_bridge_setlink` to enable the loop to also check +the length of the IFLA_BRIDGE_MODE attribute. However, this adjustment +removed the `break` statement and led to an error logic of the flags +writing back at the end of this function. + +if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + // attr should point to IFLA_BRIDGE_FLAGS NLA !!! + +Before the mentioned commit, the `attr` is granted to be IFLA_BRIDGE_FLAGS. +However, this is not necessarily true fow now as the updated loop will let +the attr point to the last NLA, even an invalid NLA which could cause +overflow writes. + +This patch introduces a new variable `br_flag` to save the NLA pointer +that points to IFLA_BRIDGE_FLAGS and uses it to resolve the mentioned +error logic. + +Fixes: d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length") +Signed-off-by: Lin Ma +Acked-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20240227121128.608110-1-linma@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/rtnetlink.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index 1b71e5c582bbc..ef218e290dfba 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -4925,10 +4925,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; +- struct nlattr *br_spec, *attr = NULL; ++ struct nlattr *br_spec, *attr, *br_flags_attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; +- bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; +@@ -4946,11 +4945,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { +- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { ++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + +- have_flags = true; ++ br_flags_attr = attr; + flags = nla_get_u16(attr); + } + +@@ -4994,8 +4993,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + } + } + +- if (have_flags) +- memcpy(nla_data(attr), &flags, sizeof(flags)); ++ if (br_flags_attr) ++ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); + out: + return err; + } +-- +2.43.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 42d7784f547..85fd4674e45 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -1 +1,46 @@ netfilter-nf_tables-disallow-timeout-for-anonymous-sets.patch +mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch +netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch +net-ip_tunnel-prevent-perpetual-headroom-growth.patch +tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch +cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch +net-veth-clear-gro-when-clearing-xdp-even-when-down.patch +ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch +lan78xx-enable-auto-speed-configuration-for-lan7850-.patch +net-enable-memcg-accounting-for-veth-queues.patch +veth-try-harder-when-allocating-queue-memory.patch +net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch +uapi-in6-replace-temporary-label-with-rfc9486.patch +stmmac-clear-variable-when-destroying-workqueue.patch +bluetooth-avoid-potential-use-after-free-in-hci_erro.patch +bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch +bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch +bluetooth-enforce-validation-on-max-value-of-connect.patch +netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch +netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch +netfilter-core-move-ip_ct_attach-indirection-to-stru.patch +netfilter-make-function-op-structures-const.patch +netfilter-let-reset-rules-clean-out-conntrack-entrie.patch +netfilter-bridge-confirm-multicast-packets-before-pa.patch +rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch +igb-extend-ptp-timestamp-adjustments-to-i211.patch +tls-rx-don-t-store-the-record-type-in-socket-context.patch +tls-rx-don-t-store-the-decryption-status-in-socket-c.patch +tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch +tls-rx-refactor-decrypt_skb_update.patch +tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch +tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch +tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch +tls-rx-wrap-decryption-arguments-in-a-structure.patch +tls-rx-factor-out-writing-contenttype-to-cmsg.patch +tls-rx-don-t-track-the-async-count.patch +tls-rx-move-counting-tlsdecrypterrors-for-sync.patch +tls-rx-assume-crypto-always-calls-our-callback.patch +tls-rx-use-async-as-an-in-out-argument.patch +tls-decrement-decrypt_pending-if-no-async-completion.patch +efi-capsule-loader-fix-incorrect-allocation-size.patch +power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch +alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch +fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch +afs-fix-endless-loop-in-directory-parsing.patch +riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch diff --git a/queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch b/queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch new file mode 100644 index 00000000000..d96658f87cc --- /dev/null +++ b/queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch @@ -0,0 +1,83 @@ +From dea96a3b376af58a1727fb971f53406a7c2d1e67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Feb 2024 17:42:32 +0100 +Subject: stmmac: Clear variable when destroying workqueue + +From: Jakub Raczynski + +[ Upstream commit 8af411bbba1f457c33734795f024d0ef26d0963f ] + +Currently when suspending driver and stopping workqueue it is checked whether +workqueue is not NULL and if so, it is destroyed. +Function destroy_workqueue() does drain queue and does clear variable, but +it does not set workqueue variable to NULL. This can cause kernel/module +panic if code attempts to clear workqueue that was not initialized. + +This scenario is possible when resuming suspended driver in stmmac_resume(), +because there is no handling for failed stmmac_hw_setup(), +which can fail and return if DMA engine has failed to initialize, +and workqueue is initialized after DMA engine. +Should DMA engine fail to initialize, resume will proceed normally, +but interface won't work and TX queue will eventually timeout, +causing 'Reset adapter' error. +This then does destroy workqueue during reset process. +And since workqueue is initialized after DMA engine and can be skipped, +it will cause kernel/module panic. + +To secure against this possible crash, set workqueue variable to NULL when +destroying workqueue. + +Log/backtrace from crash goes as follows: +[88.031977]------------[ cut here ]------------ +[88.031985]NETDEV WATCHDOG: eth0 (sxgmac): transmit queue 1 timed out +[88.032017]WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:477 dev_watchdog+0x390/0x398 + +[88.032251]---[ end trace e70de432e4d5c2c0 ]--- +[88.032282]sxgmac 16d88000.ethernet eth0: Reset adapter. +[88.036359]------------[ cut here ]------------ +[88.036519]Call trace: +[88.036523] flush_workqueue+0x3e4/0x430 +[88.036528] drain_workqueue+0xc4/0x160 +[88.036533] destroy_workqueue+0x40/0x270 +[88.036537] stmmac_fpe_stop_wq+0x4c/0x70 +[88.036541] stmmac_release+0x278/0x280 +[88.036546] __dev_close_many+0xcc/0x158 +[88.036551] dev_close_many+0xbc/0x190 +[88.036555] dev_close.part.0+0x70/0xc0 +[88.036560] dev_close+0x24/0x30 +[88.036564] stmmac_service_task+0x110/0x140 +[88.036569] process_one_work+0x1d8/0x4a0 +[88.036573] worker_thread+0x54/0x408 +[88.036578] kthread+0x164/0x170 +[88.036583] ret_from_fork+0x10/0x20 +[88.036588]---[ end trace e70de432e4d5c2c1 ]--- +[88.036597]Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 + +Fixes: 5a5586112b929 ("net: stmmac: support FPE link partner hand-shaking procedure") +Signed-off-by: Jakub Raczynski +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index a1c1e353ca072..b0ab8f6986f8b 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -3825,8 +3825,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) + { + set_bit(__FPE_REMOVING, &priv->fpe_task_state); + +- if (priv->fpe_wq) ++ if (priv->fpe_wq) { + destroy_workqueue(priv->fpe_wq); ++ priv->fpe_wq = NULL; ++ } + + netdev_info(priv->dev, "FPE workqueue stop"); + } +-- +2.43.0 + diff --git a/queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch b/queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch new file mode 100644 index 00000000000..fdd395f73b7 --- /dev/null +++ b/queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch @@ -0,0 +1,42 @@ +From 1ff365db9e76d0b1fa3372803386e06c4e2e34bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Feb 2024 23:43:57 +0100 +Subject: tls: decrement decrypt_pending if no async completion will be called + +From: Sabrina Dubroca + +[ Upstream commit f7fa16d49837f947ee59492958f9e6f0e51d9a78 ] + +With mixed sync/async decryption, or failures of crypto_aead_decrypt, +we increment decrypt_pending but we never do the corresponding +decrement since tls_decrypt_done will not be called. In this case, we +should decrement decrypt_pending immediately to avoid getting stuck. + +For example, the prequeue prequeue test gets stuck with mixed +modes (one async decrypt + one sync decrypt). + +Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") +Signed-off-by: Sabrina Dubroca +Link: https://lore.kernel.org/r/c56d5fc35543891d5319f834f25622360e1bfbec.1709132643.git.sd@queasysnail.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index a1a99f9f093b1..83319a3b8bdd1 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk, + return 0; + + ret = crypto_wait_req(ret, &ctx->async_wait); ++ } else if (darg->async) { ++ atomic_dec(&ctx->decrypt_pending); + } + darg->async = false; + +-- +2.43.0 + diff --git a/queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch b/queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch new file mode 100644 index 00000000000..0da194263d5 --- /dev/null +++ b/queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch @@ -0,0 +1,76 @@ +From ee9dec9a54e9e842b9958aa991a0e679e73f8e98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Apr 2022 20:38:23 -0700 +Subject: tls: hw: rx: use return value of tls_device_decrypted() to carry + status + +From: Jakub Kicinski + +[ Upstream commit 71471ca32505afa7c3f7f6a8268716e1ddb81cd4 ] + +Instead of tls_device poking into internals of the message +return 1 from tls_device_decrypted() if the device handled +the decryption. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_device.c | 7 ++----- + net/tls/tls_sw.c | 5 ++--- + 2 files changed, 4 insertions(+), 8 deletions(-) + +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index f23d18e666284..e7c361807590d 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -936,7 +936,6 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) + { + struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); +- struct tls_msg *tlm = tls_msg(skb); + int is_decrypted = skb->decrypted; + int is_encrypted = !is_decrypted; + struct sk_buff *skb_iter; +@@ -951,11 +950,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + tls_ctx->rx.rec_seq, rxm->full_len, + is_encrypted, is_decrypted); + +- tlm->decrypted |= is_decrypted; +- + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { + if (likely(is_encrypted || is_decrypted)) +- return 0; ++ return is_decrypted; + + /* After tls_device_down disables the offload, the next SKB will + * likely have initial fragments decrypted, and final ones not +@@ -970,7 +967,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + */ + if (is_decrypted) { + ctx->resync_nh_reset = 1; +- return 0; ++ return is_decrypted; + } + if (is_encrypted) { + tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb); +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 7da17dd7c38b9..eed32ef3ca4a0 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1571,9 +1571,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); + if (err < 0) + return err; +- +- /* skip SW decryption if NIC handled it already */ +- if (tlm->decrypted) { ++ if (err > 0) { ++ tlm->decrypted = 1; + *zc = false; + goto decrypt_done; + } +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch b/queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch new file mode 100644 index 00000000000..8581b303500 --- /dev/null +++ b/queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch @@ -0,0 +1,38 @@ +From 4d8900bf4b8d44dab517788ebb440b614a6a719c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Apr 2022 12:19:12 -0700 +Subject: tls: rx: assume crypto always calls our callback + +From: Jakub Kicinski + +[ Upstream commit 1c699ffa48a15710746989c36a82cbfb07e8d17f ] + +If crypto didn't always invoke our callback for async +we'd not be clearing skb->sk and would crash in the +skb core when freeing it. This if must be dead code. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 85fa49170b4e5..27ac27daec868 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -277,9 +277,6 @@ static int tls_do_decryption(struct sock *sk, + if (ret == -EBADMSG) + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); + +- if (async) +- atomic_dec(&ctx->decrypt_pending); +- + return ret; + } + +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch b/queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch new file mode 100644 index 00000000000..52b00bd5f5b --- /dev/null +++ b/queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch @@ -0,0 +1,47 @@ +From b89682750d6bee89bbcf232970f6d5770424ad76 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Apr 2022 20:38:21 -0700 +Subject: tls: rx: don't issue wake ups when data is decrypted + +From: Jakub Kicinski + +[ Upstream commit 5dbda02d322db7762f1a0348117cde913fb46c13 ] + +We inform the applications that data is available when +the record is received. Decryption happens inline inside +recvmsg or splice call. Generating another wakeup inside +the decryption handler seems pointless as someone must +be actively reading the socket if we are executing this +code. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 0a6630bbef53e..5fdc4f5193ee5 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1557,7 +1557,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + bool async) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); +- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); +@@ -1596,7 +1595,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + rxm->full_len -= prot->overhead_size; + tls_advance_record_sn(sk, prot, &tls_ctx->rx); + tlm->decrypted = 1; +- ctx->saved_data_ready(sk); + } else { + *zc = false; + } +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch b/queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch new file mode 100644 index 00000000000..d44221f1e19 --- /dev/null +++ b/queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch @@ -0,0 +1,177 @@ +From c4bd2ea6944d8b91a240bb02187ea60feec48ead Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Apr 2022 11:31:25 -0700 +Subject: tls: rx: don't report text length from the bowels of decrypt + +From: Jakub Kicinski + +[ Upstream commit 9bdf75ccffa690237cd0b472cd598cf6d22873dc ] + +We plumb pointer to chunk all the way to the decryption method. +It's set to the length of the text when decrypt_skb_update() +returns. + +I think the code is written this way because original TLS +implementation passed &chunk to zerocopy_from_iter() and this +was carried forward as the code gotten more complex, without +any refactoring. + +The fix for peek() introduced a new variable - to_decrypt +which for all practical purposes is what chunk is going to +get set to. Spare ourselves the pointer passing, use to_decrypt. + +Use this opportunity to clean things up a little further. + +Note that chunk / to_decrypt was mostly needed for the async +path, since the sync path would access rxm->full_len (decryption +transforms full_len from record size to text size). Use the +right source of truth more explicitly. + +We have three cases: + - async - it's TLS 1.2 only, so chunk == to_decrypt, but we + need the min() because to_decrypt is a whole record + and we don't want to underflow len. Note that we can't + handle partial record by falling back to sync as it + would introduce reordering against records in flight. + - zc - again, TLS 1.2 only for now, so chunk == to_decrypt, + we don't do zc if len < to_decrypt, no need to check again. + - normal - it already handles chunk > len, we can factor out the + assignment to rxm->full_len and share it with zc. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 33 ++++++++++++++------------------- + 1 file changed, 14 insertions(+), 19 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index cf09f147f5a09..fc1fa98d21937 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1415,7 +1415,7 @@ static int tls_setup_from_iter(struct iov_iter *from, + static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + struct iov_iter *out_iov, + struct scatterlist *out_sg, +- int *chunk, bool *zc, bool async) ++ bool *zc, bool async) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); +@@ -1522,7 +1522,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + (n_sgout - 1)); + if (err < 0) + goto fallback_to_reg_recv; +- *chunk = data_len; + } else if (out_sg) { + memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); + } else { +@@ -1532,7 +1531,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + fallback_to_reg_recv: + sgout = sgin; + pages = 0; +- *chunk = data_len; + *zc = false; + } + +@@ -1551,8 +1549,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + } + + static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, +- struct iov_iter *dest, int *chunk, bool *zc, +- bool async) ++ struct iov_iter *dest, bool *zc, bool async) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; +@@ -1576,7 +1573,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + } + } + +- err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); ++ err = decrypt_internal(sk, skb, dest, NULL, zc, async); + if (err < 0) { + if (err == -EINPROGRESS) + tls_advance_record_sn(sk, prot, &tls_ctx->rx); +@@ -1603,9 +1600,8 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, + struct scatterlist *sgout) + { + bool zc = true; +- int chunk; + +- return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); ++ return decrypt_internal(sk, skb, NULL, sgout, &zc, false); + } + + static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, +@@ -1795,9 +1791,8 @@ int tls_sw_recvmsg(struct sock *sk, + num_async = 0; + while (len && (decrypted + copied < target || ctx->recv_pkt)) { + bool retain_skb = false; ++ int to_decrypt, chunk; + bool zc = false; +- int to_decrypt; +- int chunk = 0; + bool async_capable; + bool async = false; + +@@ -1834,7 +1829,7 @@ int tls_sw_recvmsg(struct sock *sk, + async_capable = false; + + err = decrypt_skb_update(sk, skb, &msg->msg_iter, +- &chunk, &zc, async_capable); ++ &zc, async_capable); + if (err < 0 && err != -EINPROGRESS) { + tls_err_abort(sk, -EBADMSG); + goto recv_end; +@@ -1872,8 +1867,13 @@ int tls_sw_recvmsg(struct sock *sk, + } + } + +- if (async) ++ if (async) { ++ /* TLS 1.2-only, to_decrypt must be text length */ ++ chunk = min_t(int, to_decrypt, len); + goto pick_next_record; ++ } ++ /* TLS 1.3 may have updated the length by more than overhead */ ++ chunk = rxm->full_len; + + if (!zc) { + if (bpf_strp_enabled) { +@@ -1889,11 +1889,9 @@ int tls_sw_recvmsg(struct sock *sk, + } + } + +- if (rxm->full_len > len) { ++ if (chunk > len) { + retain_skb = true; + chunk = len; +- } else { +- chunk = rxm->full_len; + } + + err = skb_copy_datagram_msg(skb, rxm->offset, +@@ -1908,9 +1906,6 @@ int tls_sw_recvmsg(struct sock *sk, + } + + pick_next_record: +- if (chunk > len) +- chunk = len; +- + decrypted += chunk; + len -= chunk; + +@@ -2011,7 +2006,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + if (!skb) + goto splice_read_end; + +- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); ++ err = decrypt_skb_update(sk, skb, NULL, &zc, false); + if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto splice_read_end; +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch b/queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch new file mode 100644 index 00000000000..6c57115c3c3 --- /dev/null +++ b/queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch @@ -0,0 +1,116 @@ +From 6262e20920b50d8cc830964df259ff18380046d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Apr 2022 20:38:17 -0700 +Subject: tls: rx: don't store the decryption status in socket context + +From: Jakub Kicinski + +[ Upstream commit 7dc59c33d62c4520a119051d4486c214ef5caa23 ] + +Similar justification to previous change, the information +about decryption status belongs in the skb. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + include/net/strparser.h | 1 + + include/net/tls.h | 1 - + net/tls/tls_device.c | 3 ++- + net/tls/tls_sw.c | 10 ++++++---- + 4 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/include/net/strparser.h b/include/net/strparser.h +index c271543076cf8..a191486eb1e4c 100644 +--- a/include/net/strparser.h ++++ b/include/net/strparser.h +@@ -72,6 +72,7 @@ struct sk_skb_cb { + u64 temp_reg; + struct tls_msg { + u8 control; ++ u8 decrypted; + } tls; + }; + +diff --git a/include/net/tls.h b/include/net/tls.h +index 24c1b718ceacc..ea0aeae26cf76 100644 +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -147,7 +147,6 @@ struct tls_sw_context_rx { + + struct sk_buff *recv_pkt; + u8 async_capable:1; +- u8 decrypted:1; + atomic_t decrypt_pending; + /* protect crypto_wait with decrypt_pending*/ + spinlock_t decrypt_compl_lock; +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index 88785196a8966..f23d18e666284 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -936,6 +936,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) + { + struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); ++ struct tls_msg *tlm = tls_msg(skb); + int is_decrypted = skb->decrypted; + int is_encrypted = !is_decrypted; + struct sk_buff *skb_iter; +@@ -950,7 +951,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + tls_ctx->rx.rec_seq, rxm->full_len, + is_encrypted, is_decrypted); + +- ctx->sw.decrypted |= is_decrypted; ++ tlm->decrypted |= is_decrypted; + + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { + if (likely(is_encrypted || is_decrypted)) +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 82d7c9b036bc7..0a6630bbef53e 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1560,9 +1560,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm = strp_msg(skb); ++ struct tls_msg *tlm = tls_msg(skb); + int pad, err = 0; + +- if (!ctx->decrypted) { ++ if (!tlm->decrypted) { + if (tls_ctx->rx_conf == TLS_HW) { + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); + if (err < 0) +@@ -1570,7 +1571,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + } + + /* Still not decrypted after tls_device */ +- if (!ctx->decrypted) { ++ if (!tlm->decrypted) { + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, + async); + if (err < 0) { +@@ -1594,7 +1595,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; + tls_advance_record_sn(sk, prot, &tls_ctx->rx); +- ctx->decrypted = 1; ++ tlm->decrypted = 1; + ctx->saved_data_ready(sk); + } else { + *zc = false; +@@ -2137,8 +2138,9 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) + { + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); ++ struct tls_msg *tlm = tls_msg(skb); + +- ctx->decrypted = 0; ++ tlm->decrypted = 0; + + ctx->recv_pkt = skb; + strp_pause(strp); +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch b/queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch new file mode 100644 index 00000000000..a6479979392 --- /dev/null +++ b/queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch @@ -0,0 +1,241 @@ +From f550346a37e8a8e8b3dc78a2e78d0efb3b9cac0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Apr 2022 20:38:16 -0700 +Subject: tls: rx: don't store the record type in socket context + +From: Jakub Kicinski + +[ Upstream commit c3f6bb74137c68b515b7e2ff123a80611e801013 ] + +Original TLS implementation was handling one record at a time. +It stashed the type of the record inside tls context (per socket +structure) for convenience. When async crypto support was added +[1] the author had to use skb->cb to store the type per-message. + +The use of skb->cb overlaps with strparser, however, so a hybrid +approach was taken where type is stored in context while parsing +(since we parse a message at a time) but once parsed its copied +to skb->cb. + +Recently a workaround for sockmaps [2] exposed the previously +private struct _strp_msg and started a trend of adding user +fields directly in strparser's header. This is cleaner than +storing information about an skb in the context. + +This change is not strictly necessary, but IMHO the ownership +of the context field is confusing. Information naturally +belongs to the skb. + +[1] commit 94524d8fc965 ("net/tls: Add support for async decryption of tls records") +[2] commit b2c4618162ec ("bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg") + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + include/net/strparser.h | 3 +++ + include/net/tls.h | 10 +++------- + net/tls/tls_sw.c | 38 +++++++++++++++++--------------------- + 3 files changed, 23 insertions(+), 28 deletions(-) + +diff --git a/include/net/strparser.h b/include/net/strparser.h +index 732b7097d78e4..c271543076cf8 100644 +--- a/include/net/strparser.h ++++ b/include/net/strparser.h +@@ -70,6 +70,9 @@ struct sk_skb_cb { + * when dst_reg == src_reg. + */ + u64 temp_reg; ++ struct tls_msg { ++ u8 control; ++ } tls; + }; + + static inline struct strp_msg *strp_msg(struct sk_buff *skb) +diff --git a/include/net/tls.h b/include/net/tls.h +index eda0015c5c592..24c1b718ceacc 100644 +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -116,11 +116,6 @@ struct tls_rec { + u8 aead_req_ctx[]; + }; + +-struct tls_msg { +- struct strp_msg rxm; +- u8 control; +-}; +- + struct tx_work { + struct delayed_work work; + struct sock *sk; +@@ -151,7 +146,6 @@ struct tls_sw_context_rx { + void (*saved_data_ready)(struct sock *sk); + + struct sk_buff *recv_pkt; +- u8 control; + u8 async_capable:1; + u8 decrypted:1; + atomic_t decrypt_pending; +@@ -410,7 +404,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); + + static inline struct tls_msg *tls_msg(struct sk_buff *skb) + { +- return (struct tls_msg *)strp_msg(skb); ++ struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb; ++ ++ return &scb->tls; + } + + static inline bool tls_is_partially_sent_record(struct tls_context *ctx) +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index e6f700f67c010..82d7c9b036bc7 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -128,10 +128,10 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len) + return __skb_nsg(skb, offset, len, 0); + } + +-static int padding_length(struct tls_sw_context_rx *ctx, +- struct tls_prot_info *prot, struct sk_buff *skb) ++static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb) + { + struct strp_msg *rxm = strp_msg(skb); ++ struct tls_msg *tlm = tls_msg(skb); + int sub = 0; + + /* Determine zero-padding length */ +@@ -153,7 +153,7 @@ static int padding_length(struct tls_sw_context_rx *ctx, + sub++; + back++; + } +- ctx->control = content_type; ++ tlm->control = content_type; + } + return sub; + } +@@ -187,7 +187,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) + struct strp_msg *rxm = strp_msg(skb); + int pad; + +- pad = padding_length(ctx, prot, skb); ++ pad = padding_length(prot, skb); + if (pad < 0) { + ctx->async_wait.err = pad; + tls_err_abort(skb->sk, pad); +@@ -1423,6 +1423,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm = strp_msg(skb); ++ struct tls_msg *tlm = tls_msg(skb); + int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; + struct aead_request *aead_req; + struct sk_buff *unused; +@@ -1500,7 +1501,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + /* Prepare AAD */ + tls_make_aad(aad, rxm->full_len - prot->overhead_size + + prot->tail_size, +- tls_ctx->rx.rec_seq, ctx->control, prot); ++ tls_ctx->rx.rec_seq, tlm->control, prot); + + /* Prepare sgin */ + sg_init_table(sgin, n_sgin); +@@ -1585,7 +1586,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + *zc = false; + } + +- pad = padding_length(ctx, prot, skb); ++ pad = padding_length(prot, skb); + if (pad < 0) + return pad; + +@@ -1817,26 +1818,21 @@ int tls_sw_recvmsg(struct sock *sk, + } + } + goto recv_end; +- } else { +- tlm = tls_msg(skb); +- if (prot->version == TLS_1_3_VERSION) +- tlm->control = 0; +- else +- tlm->control = ctx->control; + } + + rxm = strp_msg(skb); ++ tlm = tls_msg(skb); + + to_decrypt = rxm->full_len - prot->overhead_size; + + if (to_decrypt <= len && !is_kvec && !is_peek && +- ctx->control == TLS_RECORD_TYPE_DATA && ++ tlm->control == TLS_RECORD_TYPE_DATA && + prot->version != TLS_1_3_VERSION && + !bpf_strp_enabled) + zc = true; + + /* Do not use async mode if record is non-data */ +- if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) ++ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) + async_capable = ctx->async_capable; + else + async_capable = false; +@@ -1851,8 +1847,6 @@ int tls_sw_recvmsg(struct sock *sk, + if (err == -EINPROGRESS) { + async = true; + num_async++; +- } else if (prot->version == TLS_1_3_VERSION) { +- tlm->control = ctx->control; + } + + /* If the type of records being processed is not known yet, +@@ -1999,6 +1993,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = NULL; + struct sock *sk = sock->sk; ++ struct tls_msg *tlm; + struct sk_buff *skb; + ssize_t copied = 0; + bool from_queue; +@@ -2027,14 +2022,15 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + } + } + ++ rxm = strp_msg(skb); ++ tlm = tls_msg(skb); ++ + /* splice does not support reading control messages */ +- if (ctx->control != TLS_RECORD_TYPE_DATA) { ++ if (tlm->control != TLS_RECORD_TYPE_DATA) { + err = -EINVAL; + goto splice_read_end; + } + +- rxm = strp_msg(skb); +- + chunk = min_t(unsigned int, rxm->full_len, len); + copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); + if (copied < 0) +@@ -2077,10 +2073,10 @@ bool tls_sw_sock_is_readable(struct sock *sk) + static int tls_read_size(struct strparser *strp, struct sk_buff *skb) + { + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); +- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + struct strp_msg *rxm = strp_msg(skb); ++ struct tls_msg *tlm = tls_msg(skb); + size_t cipher_overhead; + size_t data_len = 0; + int ret; +@@ -2101,7 +2097,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) + if (ret < 0) + goto read_failure; + +- ctx->control = header[0]; ++ tlm->control = header[0]; + + data_len = ((header[4] & 0xFF) | (header[3] << 8)); + +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-don-t-track-the-async-count.patch b/queue-5.15/tls-rx-don-t-track-the-async-count.patch new file mode 100644 index 00000000000..b338d8226bd --- /dev/null +++ b/queue-5.15/tls-rx-don-t-track-the-async-count.patch @@ -0,0 +1,84 @@ +From aaacfbe1c45047270c216374c84a8149b10d630a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Apr 2022 11:31:30 -0700 +Subject: tls: rx: don't track the async count + +From: Jakub Kicinski + +[ Upstream commit 7da18bcc5e4cfd14ea520367546c5697e64ae592 ] + +We track both if the last record was handled by async crypto +and how many records were async. This is not necessary. We +implicitly assume once crypto goes async it will stay that +way, otherwise we'd reorder records. So just track if we're +in async mode, the exact number of records is not necessary. + +This change also forces us into "async" mode more consistently +in case crypto ever decided to interleave async and sync. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index ca71a9f559b37..d3bbae9af9f41 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1753,13 +1753,13 @@ int tls_sw_recvmsg(struct sock *sk, + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct sk_psock *psock; +- int num_async, pending; + unsigned char control = 0; + ssize_t decrypted = 0; + struct strp_msg *rxm; + struct tls_msg *tlm; + struct sk_buff *skb; + ssize_t copied = 0; ++ bool async = false; + int target, err = 0; + long timeo; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); +@@ -1791,12 +1791,10 @@ int tls_sw_recvmsg(struct sock *sk, + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + decrypted = 0; +- num_async = 0; + while (len && (decrypted + copied < target || ctx->recv_pkt)) { + struct tls_decrypt_arg darg = {}; + bool retain_skb = false; + int to_decrypt, chunk; +- bool async; + + skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); + if (!skb) { +@@ -1836,10 +1834,8 @@ int tls_sw_recvmsg(struct sock *sk, + goto recv_end; + } + +- if (err == -EINPROGRESS) { ++ if (err == -EINPROGRESS) + async = true; +- num_async++; +- } + + /* If the type of records being processed is not known yet, + * set it to record type just dequeued. If it is already known, +@@ -1914,7 +1910,9 @@ int tls_sw_recvmsg(struct sock *sk, + } + + recv_end: +- if (num_async) { ++ if (async) { ++ int pending; ++ + /* Wait for all previously submitted records to be decrypted */ + spin_lock_bh(&ctx->decrypt_compl_lock); + ctx->async_notify = true; +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch b/queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch new file mode 100644 index 00000000000..19ba798c3ed --- /dev/null +++ b/queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch @@ -0,0 +1,73 @@ +From 1a773bacd9783962656ab5cba2346b81f34697cc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Apr 2022 11:31:24 -0700 +Subject: tls: rx: drop unnecessary arguments from tls_setup_from_iter() + +From: Jakub Kicinski + +[ Upstream commit d4bd88e67666c73cfa9d75c282e708890d4f10a7 ] + +sk is unused, remove it to make it clear the function +doesn't poke at the socket. + +size_used is always 0 on input and @length on success. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index eed32ef3ca4a0..cf09f147f5a09 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1348,15 +1348,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, + return skb; + } + +-static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, ++static int tls_setup_from_iter(struct iov_iter *from, + int length, int *pages_used, +- unsigned int *size_used, + struct scatterlist *to, + int to_max_pages) + { + int rc = 0, i = 0, num_elem = *pages_used, maxpages; + struct page *pages[MAX_SKB_FRAGS]; +- unsigned int size = *size_used; ++ unsigned int size = 0; + ssize_t copied, use; + size_t offset; + +@@ -1399,8 +1398,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, + sg_mark_end(&to[num_elem - 1]); + out: + if (rc) +- iov_iter_revert(from, size - *size_used); +- *size_used = size; ++ iov_iter_revert(from, size); + *pages_used = num_elem; + + return rc; +@@ -1519,12 +1517,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + sg_init_table(sgout, n_sgout); + sg_set_buf(&sgout[0], aad, prot->aad_size); + +- *chunk = 0; +- err = tls_setup_from_iter(sk, out_iov, data_len, +- &pages, chunk, &sgout[1], ++ err = tls_setup_from_iter(out_iov, data_len, ++ &pages, &sgout[1], + (n_sgout - 1)); + if (err < 0) + goto fallback_to_reg_recv; ++ *chunk = data_len; + } else if (out_sg) { + memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); + } else { +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch b/queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch new file mode 100644 index 00000000000..f05695fe5dc --- /dev/null +++ b/queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch @@ -0,0 +1,196 @@ +From 0691e263e075592a7610cea28a3a235650592b15 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Apr 2022 11:31:28 -0700 +Subject: tls: rx: factor out writing ContentType to cmsg + +From: Jakub Kicinski + +[ Upstream commit 06554f4ffc2595ae52ee80aec4a13bd77d22bed7 ] + +cmsg can be filled in during rx_list processing or normal +receive. Consolidate the code. + +We don't need to keep the boolean to track if the cmsg was +created. 0 is an invalid content type. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 91 +++++++++++++++++++----------------------------- + 1 file changed, 36 insertions(+), 55 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index c491cde30504e..ca71a9f559b37 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1634,6 +1634,29 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, + return true; + } + ++static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, ++ u8 *control) ++{ ++ int err; ++ ++ if (!*control) { ++ *control = tlm->control; ++ if (!*control) ++ return -EBADMSG; ++ ++ err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, ++ sizeof(*control), control); ++ if (*control != TLS_RECORD_TYPE_DATA) { ++ if (err || msg->msg_flags & MSG_CTRUNC) ++ return -EIO; ++ } ++ } else if (*control != tlm->control) { ++ return 0; ++ } ++ ++ return 1; ++} ++ + /* This function traverses the rx_list in tls receive context to copies the + * decrypted records into the buffer provided by caller zero copy is not + * true. Further, the records are removed from the rx_list if it is not a peek +@@ -1642,31 +1665,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, + static int process_rx_list(struct tls_sw_context_rx *ctx, + struct msghdr *msg, + u8 *control, +- bool *cmsg, + size_t skip, + size_t len, + bool zc, + bool is_peek) + { + struct sk_buff *skb = skb_peek(&ctx->rx_list); +- u8 ctrl = *control; +- u8 msgc = *cmsg; + struct tls_msg *tlm; + ssize_t copied = 0; +- +- /* Set the record type in 'control' if caller didn't pass it */ +- if (!ctrl && skb) { +- tlm = tls_msg(skb); +- ctrl = tlm->control; +- } ++ int err; + + while (skip && skb) { + struct strp_msg *rxm = strp_msg(skb); + tlm = tls_msg(skb); + +- /* Cannot process a record of different type */ +- if (ctrl != tlm->control) +- return 0; ++ err = tls_record_content_type(msg, tlm, control); ++ if (err <= 0) ++ return err; + + if (skip < rxm->full_len) + break; +@@ -1682,27 +1697,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, + + tlm = tls_msg(skb); + +- /* Cannot process a record of different type */ +- if (ctrl != tlm->control) +- return 0; +- +- /* Set record type if not already done. For a non-data record, +- * do not proceed if record type could not be copied. +- */ +- if (!msgc) { +- int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, +- sizeof(ctrl), &ctrl); +- msgc = true; +- if (ctrl != TLS_RECORD_TYPE_DATA) { +- if (cerr || msg->msg_flags & MSG_CTRUNC) +- return -EIO; +- +- *cmsg = msgc; +- } +- } ++ err = tls_record_content_type(msg, tlm, control); ++ if (err <= 0) ++ return err; + + if (!zc || (rxm->full_len - skip) > len) { +- int err = skb_copy_datagram_msg(skb, rxm->offset + skip, ++ err = skb_copy_datagram_msg(skb, rxm->offset + skip, + msg, chunk); + if (err < 0) + return err; +@@ -1739,7 +1739,6 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, + skb = next_skb; + } + +- *control = ctrl; + return copied; + } + +@@ -1761,7 +1760,6 @@ int tls_sw_recvmsg(struct sock *sk, + struct tls_msg *tlm; + struct sk_buff *skb; + ssize_t copied = 0; +- bool cmsg = false; + int target, err = 0; + long timeo; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); +@@ -1778,8 +1776,7 @@ int tls_sw_recvmsg(struct sock *sk, + bpf_strp_enabled = sk_psock_strp_enabled(psock); + + /* Process pending decrypted records. It must be non-zero-copy */ +- err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false, +- is_peek); ++ err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); + if (err < 0) { + tls_err_abort(sk, err); + goto end; +@@ -1851,26 +1848,10 @@ int tls_sw_recvmsg(struct sock *sk, + * is known just after record is dequeued from stream parser. + * For tls1.3, we disable async. + */ +- +- if (!control) +- control = tlm->control; +- else if (control != tlm->control) ++ err = tls_record_content_type(msg, tlm, &control); ++ if (err <= 0) + goto recv_end; + +- if (!cmsg) { +- int cerr; +- +- cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, +- sizeof(control), &control); +- cmsg = true; +- if (control != TLS_RECORD_TYPE_DATA) { +- if (cerr || msg->msg_flags & MSG_CTRUNC) { +- err = -EIO; +- goto recv_end; +- } +- } +- } +- + if (async) { + /* TLS 1.2-only, to_decrypt must be text length */ + chunk = min_t(int, to_decrypt, len); +@@ -1959,10 +1940,10 @@ int tls_sw_recvmsg(struct sock *sk, + + /* Drain records from the rx_list & copy if required */ + if (is_peek || is_kvec) +- err = process_rx_list(ctx, msg, &control, &cmsg, copied, ++ err = process_rx_list(ctx, msg, &control, copied, + decrypted, false, is_peek); + else +- err = process_rx_list(ctx, msg, &control, &cmsg, 0, ++ err = process_rx_list(ctx, msg, &control, 0, + decrypted, true, is_peek); + if (err < 0) { + tls_err_abort(sk, err); +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch b/queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch new file mode 100644 index 00000000000..38177daf30b --- /dev/null +++ b/queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch @@ -0,0 +1,49 @@ +From c2fb73ea5bd49d8a9c033c57ad6b98af0a162093 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Apr 2022 12:19:10 -0700 +Subject: tls: rx: move counting TlsDecryptErrors for sync + +From: Jakub Kicinski + +[ Upstream commit 284b4d93daee56dff3e10029ddf2e03227f50dbf ] + +Move counting TlsDecryptErrors to tls_do_decryption() +where differences between sync and async crypto are +reconciled. + +No functional changes, this code just always gave +me a pause. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index d3bbae9af9f41..85fa49170b4e5 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -274,6 +274,8 @@ static int tls_do_decryption(struct sock *sk, + + ret = crypto_wait_req(ret, &ctx->async_wait); + } ++ if (ret == -EBADMSG) ++ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); + + if (async) + atomic_dec(&ctx->decrypt_pending); +@@ -1583,8 +1585,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + if (err < 0) { + if (err == -EINPROGRESS) + tls_advance_record_sn(sk, prot, &tls_ctx->rx); +- else if (err == -EBADMSG) +- TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); + return err; + } + +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-refactor-decrypt_skb_update.patch b/queue-5.15/tls-rx-refactor-decrypt_skb_update.patch new file mode 100644 index 00000000000..1f4c24a1ec9 --- /dev/null +++ b/queue-5.15/tls-rx-refactor-decrypt_skb_update.patch @@ -0,0 +1,107 @@ +From a31e78e9ccb122c8276bfbc8343347a95e5e48af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Apr 2022 20:38:22 -0700 +Subject: tls: rx: refactor decrypt_skb_update() + +From: Jakub Kicinski + +[ Upstream commit 3764ae5ba6615095de86698a00e814513b9ad0d5 ] + +Use early return and a jump label to remove two indentation levels. +No functional changes. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 66 ++++++++++++++++++++++++------------------------ + 1 file changed, 33 insertions(+), 33 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 5fdc4f5193ee5..7da17dd7c38b9 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1560,46 +1560,46 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); +- int pad, err = 0; ++ int pad, err; + +- if (!tlm->decrypted) { +- if (tls_ctx->rx_conf == TLS_HW) { +- err = tls_device_decrypted(sk, tls_ctx, skb, rxm); +- if (err < 0) +- return err; +- } ++ if (tlm->decrypted) { ++ *zc = false; ++ return 0; ++ } + +- /* Still not decrypted after tls_device */ +- if (!tlm->decrypted) { +- err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, +- async); +- if (err < 0) { +- if (err == -EINPROGRESS) +- tls_advance_record_sn(sk, prot, +- &tls_ctx->rx); +- else if (err == -EBADMSG) +- TLS_INC_STATS(sock_net(sk), +- LINUX_MIB_TLSDECRYPTERROR); +- return err; +- } +- } else { ++ if (tls_ctx->rx_conf == TLS_HW) { ++ err = tls_device_decrypted(sk, tls_ctx, skb, rxm); ++ if (err < 0) ++ return err; ++ ++ /* skip SW decryption if NIC handled it already */ ++ if (tlm->decrypted) { + *zc = false; ++ goto decrypt_done; + } ++ } + +- pad = padding_length(prot, skb); +- if (pad < 0) +- return pad; +- +- rxm->full_len -= pad; +- rxm->offset += prot->prepend_size; +- rxm->full_len -= prot->overhead_size; +- tls_advance_record_sn(sk, prot, &tls_ctx->rx); +- tlm->decrypted = 1; +- } else { +- *zc = false; ++ err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); ++ if (err < 0) { ++ if (err == -EINPROGRESS) ++ tls_advance_record_sn(sk, prot, &tls_ctx->rx); ++ else if (err == -EBADMSG) ++ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); ++ return err; + } + +- return err; ++decrypt_done: ++ pad = padding_length(prot, skb); ++ if (pad < 0) ++ return pad; ++ ++ rxm->full_len -= pad; ++ rxm->offset += prot->prepend_size; ++ rxm->full_len -= prot->overhead_size; ++ tls_advance_record_sn(sk, prot, &tls_ctx->rx); ++ tlm->decrypted = 1; ++ ++ return 0; + } + + int decrypt_skb(struct sock *sk, struct sk_buff *skb, +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch b/queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch new file mode 100644 index 00000000000..c8d6033ca39 --- /dev/null +++ b/queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch @@ -0,0 +1,118 @@ +From bda007d9121ca97630a5bd3ffede8021d4c75177 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Apr 2022 12:19:15 -0700 +Subject: tls: rx: use async as an in-out argument + +From: Jakub Kicinski + +[ Upstream commit 3547a1f9d988d88ecff4fc365d2773037c849f49 ] + +Propagating EINPROGRESS thru multiple layers of functions is +error prone. Use darg->async as an in/out argument, like we +use darg->zc today. On input it tells the code if async is +allowed, on output if it took place. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 31 ++++++++++++++++--------------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 27ac27daec868..a1a99f9f093b1 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -236,7 +236,7 @@ static int tls_do_decryption(struct sock *sk, + char *iv_recv, + size_t data_len, + struct aead_request *aead_req, +- bool async) ++ struct tls_decrypt_arg *darg) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; +@@ -249,7 +249,7 @@ static int tls_do_decryption(struct sock *sk, + data_len + prot->tag_size, + (u8 *)iv_recv); + +- if (async) { ++ if (darg->async) { + /* Using skb->sk to push sk through to crypto async callback + * handler. This allows propagating errors up to the socket + * if needed. It _must_ be cleared in the async handler +@@ -269,11 +269,13 @@ static int tls_do_decryption(struct sock *sk, + + ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS) { +- if (async) +- return ret; ++ if (darg->async) ++ return 0; + + ret = crypto_wait_req(ret, &ctx->async_wait); + } ++ darg->async = false; ++ + if (ret == -EBADMSG) + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); + +@@ -1540,9 +1542,9 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + + /* Prepare and submit AEAD request */ + err = tls_do_decryption(sk, skb, sgin, sgout, iv, +- data_len, aead_req, darg->async); +- if (err == -EINPROGRESS) +- return err; ++ data_len, aead_req, darg); ++ if (darg->async) ++ return 0; + + /* Release the pages in case iov was mapped to pages */ + for (; pages > 0; pages--) +@@ -1579,11 +1581,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + } + + err = decrypt_internal(sk, skb, dest, NULL, darg); +- if (err < 0) { +- if (err == -EINPROGRESS) +- tls_advance_record_sn(sk, prot, &tls_ctx->rx); ++ if (err < 0) + return err; +- } ++ if (darg->async) ++ goto decrypt_next; + + decrypt_done: + pad = padding_length(prot, skb); +@@ -1593,8 +1594,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + rxm->full_len -= pad; + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; +- tls_advance_record_sn(sk, prot, &tls_ctx->rx); + tlm->decrypted = 1; ++decrypt_next: ++ tls_advance_record_sn(sk, prot, &tls_ctx->rx); + + return 0; + } +@@ -1826,13 +1828,12 @@ int tls_sw_recvmsg(struct sock *sk, + darg.async = false; + + err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); +- if (err < 0 && err != -EINPROGRESS) { ++ if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto recv_end; + } + +- if (err == -EINPROGRESS) +- async = true; ++ async |= darg.async; + + /* If the type of records being processed is not known yet, + * set it to record type just dequeued. If it is already known, +-- +2.43.0 + diff --git a/queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch b/queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch new file mode 100644 index 00000000000..3895d4464a3 --- /dev/null +++ b/queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch @@ -0,0 +1,197 @@ +From 890569dace1deb9afd771b5fb88d07455426c5b3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Apr 2022 11:31:26 -0700 +Subject: tls: rx: wrap decryption arguments in a structure + +From: Jakub Kicinski + +[ Upstream commit 4175eac37123a68ebee71f288826339fb89bfec7 ] + +We pass zc as a pointer to bool a few functions down as an in/out +argument. This is error prone since C will happily evalue a pointer +as a boolean (IOW forgetting *zc and writing zc leads to loss of +developer time..). Wrap the arguments into a structure. + +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called") +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 49 ++++++++++++++++++++++++++---------------------- + 1 file changed, 27 insertions(+), 22 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index fc1fa98d21937..c491cde30504e 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -44,6 +44,11 @@ + #include + #include + ++struct tls_decrypt_arg { ++ bool zc; ++ bool async; ++}; ++ + noinline void tls_err_abort(struct sock *sk, int err) + { + WARN_ON_ONCE(err >= 0); +@@ -1415,7 +1420,7 @@ static int tls_setup_from_iter(struct iov_iter *from, + static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + struct iov_iter *out_iov, + struct scatterlist *out_sg, +- bool *zc, bool async) ++ struct tls_decrypt_arg *darg) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); +@@ -1432,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + prot->tail_size; + int iv_offset = 0; + +- if (*zc && (out_iov || out_sg)) { ++ if (darg->zc && (out_iov || out_sg)) { + if (out_iov) + n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1; + else +@@ -1441,7 +1446,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + rxm->full_len - prot->prepend_size); + } else { + n_sgout = 0; +- *zc = false; ++ darg->zc = false; + n_sgin = skb_cow_data(skb, 0, &unused); + } + +@@ -1531,12 +1536,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + fallback_to_reg_recv: + sgout = sgin; + pages = 0; +- *zc = false; ++ darg->zc = false; + } + + /* Prepare and submit AEAD request */ + err = tls_do_decryption(sk, skb, sgin, sgout, iv, +- data_len, aead_req, async); ++ data_len, aead_req, darg->async); + if (err == -EINPROGRESS) + return err; + +@@ -1549,7 +1554,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + } + + static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, +- struct iov_iter *dest, bool *zc, bool async) ++ struct iov_iter *dest, ++ struct tls_decrypt_arg *darg) + { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; +@@ -1558,7 +1564,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + int pad, err; + + if (tlm->decrypted) { +- *zc = false; ++ darg->zc = false; + return 0; + } + +@@ -1568,12 +1574,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + return err; + if (err > 0) { + tlm->decrypted = 1; +- *zc = false; ++ darg->zc = false; + goto decrypt_done; + } + } + +- err = decrypt_internal(sk, skb, dest, NULL, zc, async); ++ err = decrypt_internal(sk, skb, dest, NULL, darg); + if (err < 0) { + if (err == -EINPROGRESS) + tls_advance_record_sn(sk, prot, &tls_ctx->rx); +@@ -1599,9 +1605,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + int decrypt_skb(struct sock *sk, struct sk_buff *skb, + struct scatterlist *sgout) + { +- bool zc = true; ++ struct tls_decrypt_arg darg = { .zc = true, }; + +- return decrypt_internal(sk, skb, NULL, sgout, &zc, false); ++ return decrypt_internal(sk, skb, NULL, sgout, &darg); + } + + static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, +@@ -1790,11 +1796,10 @@ int tls_sw_recvmsg(struct sock *sk, + decrypted = 0; + num_async = 0; + while (len && (decrypted + copied < target || ctx->recv_pkt)) { ++ struct tls_decrypt_arg darg = {}; + bool retain_skb = false; + int to_decrypt, chunk; +- bool zc = false; +- bool async_capable; +- bool async = false; ++ bool async; + + skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); + if (!skb) { +@@ -1820,16 +1825,15 @@ int tls_sw_recvmsg(struct sock *sk, + tlm->control == TLS_RECORD_TYPE_DATA && + prot->version != TLS_1_3_VERSION && + !bpf_strp_enabled) +- zc = true; ++ darg.zc = true; + + /* Do not use async mode if record is non-data */ + if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) +- async_capable = ctx->async_capable; ++ darg.async = ctx->async_capable; + else +- async_capable = false; ++ darg.async = false; + +- err = decrypt_skb_update(sk, skb, &msg->msg_iter, +- &zc, async_capable); ++ err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); + if (err < 0 && err != -EINPROGRESS) { + tls_err_abort(sk, -EBADMSG); + goto recv_end; +@@ -1875,7 +1879,7 @@ int tls_sw_recvmsg(struct sock *sk, + /* TLS 1.3 may have updated the length by more than overhead */ + chunk = rxm->full_len; + +- if (!zc) { ++ if (!darg.zc) { + if (bpf_strp_enabled) { + err = sk_psock_tls_strp_read(psock, skb); + if (err != __SK_PASS) { +@@ -1991,7 +1995,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + int err = 0; + long timeo; + int chunk; +- bool zc = false; + + lock_sock(sk); + +@@ -2001,12 +2004,14 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + if (from_queue) { + skb = __skb_dequeue(&ctx->rx_list); + } else { ++ struct tls_decrypt_arg darg = {}; ++ + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, + &err); + if (!skb) + goto splice_read_end; + +- err = decrypt_skb_update(sk, skb, NULL, &zc, false); ++ err = decrypt_skb_update(sk, skb, NULL, &darg); + if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto splice_read_end; +-- +2.43.0 + diff --git a/queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch b/queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch new file mode 100644 index 00000000000..0feb97ac896 --- /dev/null +++ b/queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch @@ -0,0 +1,36 @@ +From ae5b910252b82a29df0eb2f8a1196cd113446330 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Feb 2024 11:12:07 +0800 +Subject: tun: Fix xdp_rxq_info's queue_index when detaching + +From: Yunjian Wang + +[ Upstream commit 2a770cdc4382b457ca3d43d03f0f0064f905a0d0 ] + +When a queue(tfile) is detached, we only update tfile's queue_index, +but do not update xdp_rxq_info's queue_index. This patch fixes it. + +Fixes: 8bf5c4ee1889 ("tun: setup xdp_rxq_info") +Signed-off-by: Yunjian Wang +Link: https://lore.kernel.org/r/1708398727-46308-1-git-send-email-wangyunjian@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/tun.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 603530e6cd7b9..42bf0a3ec632e 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -654,6 +654,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) + tun->tfiles[tun->numqueues - 1]); + ntfile = rtnl_dereference(tun->tfiles[index]); + ntfile->queue_index = index; ++ ntfile->xdp_rxq.queue_index = index; + rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], + NULL); + +-- +2.43.0 + diff --git a/queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch b/queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch new file mode 100644 index 00000000000..b17bbd4fe8c --- /dev/null +++ b/queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch @@ -0,0 +1,39 @@ +From 29b0929be27d9f67e2bba4ba7a07aa11a6e6cd64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Feb 2024 13:49:21 +0100 +Subject: uapi: in6: replace temporary label with rfc9486 + +From: Justin Iurman + +[ Upstream commit 6a2008641920a9c6fe1abbeb9acbec463215d505 ] + +Not really a fix per se, but IPV6_TLV_IOAM is still tagged as "TEMPORARY +IANA allocation for IOAM", while RFC 9486 is available for some time +now. Just update the reference. + +Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") +Signed-off-by: Justin Iurman +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20240226124921.9097-1-justin.iurman@uliege.be +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/uapi/linux/in6.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h +index c4c53a9ab9595..ff8d21f9e95b7 100644 +--- a/include/uapi/linux/in6.h ++++ b/include/uapi/linux/in6.h +@@ -145,7 +145,7 @@ struct in6_flowlabel_req { + #define IPV6_TLV_PADN 1 + #define IPV6_TLV_ROUTERALERT 5 + #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +-#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ ++#define IPV6_TLV_IOAM 49 /* RFC 9486 */ + #define IPV6_TLV_JUMBO 194 + #define IPV6_TLV_HAO 201 /* home address option */ + +-- +2.43.0 + diff --git a/queue-5.15/veth-try-harder-when-allocating-queue-memory.patch b/queue-5.15/veth-try-harder-when-allocating-queue-memory.patch new file mode 100644 index 00000000000..47db731379d --- /dev/null +++ b/queue-5.15/veth-try-harder-when-allocating-queue-memory.patch @@ -0,0 +1,55 @@ +From 0fef6a2d8b391073a15483c69a6a0b378aac6569 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Feb 2024 15:59:08 -0800 +Subject: veth: try harder when allocating queue memory + +From: Jakub Kicinski + +[ Upstream commit 1ce7d306ea63f3e379557c79abd88052e0483813 ] + +struct veth_rq is pretty large, 832B total without debug +options enabled. Since commit under Fixes we try to pre-allocate +enough queues for every possible CPU. Miao Wang reports that +this may lead to order-5 allocations which will fail in production. + +Let the allocation fallback to vmalloc() and try harder. +These are the same flags we pass to netdev queue allocation. + +Reported-and-tested-by: Miao Wang +Fixes: 9d3684c24a52 ("veth: create by default nr_possible_cpus queues") +Link: https://lore.kernel.org/all/5F52CAE2-2FB7-4712-95F1-3312FBBFA8DD@gmail.com/ +Signed-off-by: Jakub Kicinski +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240223235908.693010-1-kuba@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/veth.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 87cee614618ca..0102f86d48676 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1303,7 +1303,8 @@ static int veth_alloc_queues(struct net_device *dev) + struct veth_priv *priv = netdev_priv(dev); + int i; + +- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); ++ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), ++ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + if (!priv->rq) + return -ENOMEM; + +@@ -1319,7 +1320,7 @@ static void veth_free_queues(struct net_device *dev) + { + struct veth_priv *priv = netdev_priv(dev); + +- kfree(priv->rq); ++ kvfree(priv->rq); + } + + static int veth_dev_init(struct net_device *dev) +-- +2.43.0 +