]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Sun, 3 Mar 2024 15:56:43 +0000 (10:56 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 3 Mar 2024 15:56:43 +0000 (10:56 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
46 files changed:
queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch [new file with mode: 0644]
queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch [new file with mode: 0644]
queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch [new file with mode: 0644]
queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch [new file with mode: 0644]
queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch [new file with mode: 0644]
queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch [new file with mode: 0644]
queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch [new file with mode: 0644]
queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch [new file with mode: 0644]
queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch [new file with mode: 0644]
queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch [new file with mode: 0644]
queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch [new file with mode: 0644]
queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch [new file with mode: 0644]
queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch [new file with mode: 0644]
queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch [new file with mode: 0644]
queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch [new file with mode: 0644]
queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch [new file with mode: 0644]
queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch [new file with mode: 0644]
queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch [new file with mode: 0644]
queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch [new file with mode: 0644]
queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch [new file with mode: 0644]
queue-5.15/netfilter-make-function-op-structures-const.patch [new file with mode: 0644]
queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch [new file with mode: 0644]
queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch [new file with mode: 0644]
queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch [new file with mode: 0644]
queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch [new file with mode: 0644]
queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch [new file with mode: 0644]
queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch [new file with mode: 0644]
queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch [new file with mode: 0644]
queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch [new file with mode: 0644]
queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch [new file with mode: 0644]
queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch [new file with mode: 0644]
queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch [new file with mode: 0644]
queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch [new file with mode: 0644]
queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch [new file with mode: 0644]
queue-5.15/tls-rx-don-t-track-the-async-count.patch [new file with mode: 0644]
queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch [new file with mode: 0644]
queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch [new file with mode: 0644]
queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch [new file with mode: 0644]
queue-5.15/tls-rx-refactor-decrypt_skb_update.patch [new file with mode: 0644]
queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch [new file with mode: 0644]
queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch [new file with mode: 0644]
queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch [new file with mode: 0644]
queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch [new file with mode: 0644]
queue-5.15/veth-try-harder-when-allocating-queue-memory.patch [new file with mode: 0644]

diff --git a/queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch b/queue-5.15/afs-fix-endless-loop-in-directory-parsing.patch
new file mode 100644 (file)
index 0000000..bde637e
--- /dev/null
@@ -0,0 +1,68 @@
+From 38166116c770d034a08477438037a48df4b8bf94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 13:15:02 +0000
+Subject: afs: Fix endless loop in directory parsing
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 5f7a07646655fb4108da527565dcdc80124b14c4 ]
+
+If a directory has a block with only ".__afsXXXX" files in it (from
+uncompleted silly-rename), these .__afsXXXX files are skipped but without
+advancing the file position in the dir_context.  This leads to
+afs_dir_iterate() repeating the block again and again.
+
+Fix this by making the code that skips the .__afsXXXX file also manually
+advance the file position.
+
+The symptoms are a soft lookup:
+
+        watchdog: BUG: soft lockup - CPU#3 stuck for 52s! [check:5737]
+        ...
+        RIP: 0010:afs_dir_iterate_block+0x39/0x1fd
+        ...
+         ? watchdog_timer_fn+0x1a6/0x213
+        ...
+         ? asm_sysvec_apic_timer_interrupt+0x16/0x20
+         ? afs_dir_iterate_block+0x39/0x1fd
+         afs_dir_iterate+0x10a/0x148
+         afs_readdir+0x30/0x4a
+         iterate_dir+0x93/0xd3
+         __do_sys_getdents64+0x6b/0xd4
+
+This is almost certainly the actual fix for:
+
+        https://bugzilla.kernel.org/show_bug.cgi?id=218496
+
+Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Link: https://lore.kernel.org/r/786185.1708694102@warthog.procyon.org.uk
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Markus Suvanto <markus.suvanto@gmail.com>
+cc: linux-afs@lists.infradead.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/afs/dir.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/afs/dir.c b/fs/afs/dir.c
+index 106426de50279..c4e22e9f7a666 100644
+--- a/fs/afs/dir.c
++++ b/fs/afs/dir.c
+@@ -497,8 +497,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+                   dire->u.name[0] == '.' &&
+                   ctx->actor != afs_lookup_filldir &&
+                   ctx->actor != afs_lookup_one_filldir &&
+-                  memcmp(dire->u.name, ".__afs", 6) == 0)
++                  memcmp(dire->u.name, ".__afs", 6) == 0) {
++                      ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+                       continue;
++              }
+               /* found the next entry */
+               if (!dir_emit(ctx, dire->u.name, nlen,
+-- 
+2.43.0
+
diff --git a/queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch b/queue-5.15/alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch
new file mode 100644 (file)
index 0000000..51e8de5
--- /dev/null
@@ -0,0 +1,35 @@
+From 6cacd78d3d0d7f306351ea57e1693c9ebcf06c13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 10:21:56 +0100
+Subject: ALSA: Drop leftover snd-rtctimer stuff from Makefile
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 4df49712eb54141be00a9312547436d55677f092 ]
+
+We forgot to remove the line for snd-rtctimer from Makefile while
+dropping the functionality.  Get rid of the stale line.
+
+Fixes: 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer")
+Link: https://lore.kernel.org/r/20240221092156.28695-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/Makefile | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/sound/core/Makefile b/sound/core/Makefile
+index 79e1407cd0de7..7da92e0383e1c 100644
+--- a/sound/core/Makefile
++++ b/sound/core/Makefile
+@@ -33,7 +33,6 @@ snd-ctl-led-objs  := control_led.o
+ snd-rawmidi-objs  := rawmidi.o
+ snd-timer-objs    := timer.o
+ snd-hrtimer-objs  := hrtimer.o
+-snd-rtctimer-objs := rtctimer.o
+ snd-hwdep-objs    := hwdep.o
+ snd-seq-device-objs := seq_device.o
+-- 
+2.43.0
+
diff --git a/queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch b/queue-5.15/bluetooth-avoid-potential-use-after-free-in-hci_erro.patch
new file mode 100644 (file)
index 0000000..0d65df3
--- /dev/null
@@ -0,0 +1,66 @@
+From 3cd502c7c9bd2a1f6501604c1bf1203a1befa44e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Jan 2024 11:56:32 +0000
+Subject: Bluetooth: Avoid potential use-after-free in hci_error_reset
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit 2449007d3f73b2842c9734f45f0aadb522daf592 ]
+
+While handling the HCI_EV_HARDWARE_ERROR event, if the underlying
+BT controller is not responding, the GPIO reset mechanism would
+free the hci_dev and lead to a use-after-free in hci_error_reset.
+
+Here's the call trace observed on a ChromeOS device with Intel AX201:
+   queue_work_on+0x3e/0x6c
+   __hci_cmd_sync_sk+0x2ee/0x4c0 [bluetooth <HASH:3b4a6>]
+   ? init_wait_entry+0x31/0x31
+   __hci_cmd_sync+0x16/0x20 [bluetooth <HASH:3b4a 6>]
+   hci_error_reset+0x4f/0xa4 [bluetooth <HASH:3b4a 6>]
+   process_one_work+0x1d8/0x33f
+   worker_thread+0x21b/0x373
+   kthread+0x13a/0x152
+   ? pr_cont_work+0x54/0x54
+   ? kthread_blkcg+0x31/0x31
+    ret_from_fork+0x1f/0x30
+
+This patch holds the reference count on the hci_dev while processing
+a HCI_EV_HARDWARE_ERROR event to avoid potential crash.
+
+Fixes: c7741d16a57c ("Bluetooth: Perform a power cycle when receiving hardware error event")
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_core.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index b3b597960c562..a8854b24f4cfb 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -2330,6 +2330,7 @@ static void hci_error_reset(struct work_struct *work)
+ {
+       struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
++      hci_dev_hold(hdev);
+       BT_DBG("%s", hdev->name);
+       if (hdev->hw_error)
+@@ -2337,10 +2338,10 @@ static void hci_error_reset(struct work_struct *work)
+       else
+               bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
+-      if (hci_dev_do_close(hdev))
+-              return;
++      if (!hci_dev_do_close(hdev))
++              hci_dev_do_open(hdev);
+-      hci_dev_do_open(hdev);
++      hci_dev_put(hdev);
+ }
+ void hci_uuids_clear(struct hci_dev *hdev)
+-- 
+2.43.0
+
diff --git a/queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch b/queue-5.15/bluetooth-enforce-validation-on-max-value-of-connect.patch
new file mode 100644 (file)
index 0000000..4af382f
--- /dev/null
@@ -0,0 +1,68 @@
+From d5a12daef1accfe1f0be72b6de584739d96c11c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 14:50:28 +0800
+Subject: Bluetooth: Enforce validation on max value of connection interval
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+[ Upstream commit e4b019515f950b4e6e5b74b2e1bb03a90cb33039 ]
+
+Right now Linux BT stack cannot pass test case "GAP/CONN/CPUP/BV-05-C
+'Connection Parameter Update Procedure Invalid Parameters Central
+Responder'" in Bluetooth Test Suite revision GAP.TS.p44. [0]
+
+That was revoled by commit c49a8682fc5d ("Bluetooth: validate BLE
+connection interval updates"), but later got reverted due to devices
+like keyboards and mice may require low connection interval.
+
+So only validate the max value connection interval to pass the Test
+Suite, and let devices to request low connection interval if needed.
+
+[0] https://www.bluetooth.org/docman/handlers/DownloadDoc.ashx?doc_id=229869
+
+Fixes: 68d19d7d9957 ("Revert "Bluetooth: validate BLE connection interval updates"")
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c  | 4 ++++
+ net/bluetooth/l2cap_core.c | 8 +++++++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 0bfd856d079d5..ba7242729a8fb 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -6058,6 +6058,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev,
+               return send_conn_param_neg_reply(hdev, handle,
+                                                HCI_ERROR_UNKNOWN_CONN_ID);
++      if (max > hcon->le_conn_max_interval)
++              return send_conn_param_neg_reply(hdev, handle,
++                                               HCI_ERROR_INVALID_LL_PARAMS);
++
+       if (hci_check_conn_params(min, max, latency, timeout))
+               return send_conn_param_neg_reply(hdev, handle,
+                                                HCI_ERROR_INVALID_LL_PARAMS);
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 850b6aab73779..11bfc8737e6ce 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
+       memset(&rsp, 0, sizeof(rsp));
+-      err = hci_check_conn_params(min, max, latency, to_multiplier);
++      if (max > hcon->le_conn_max_interval) {
++              BT_DBG("requested connection interval exceeds current bounds.");
++              err = -EINVAL;
++      } else {
++              err = hci_check_conn_params(min, max, latency, to_multiplier);
++      }
++
+       if (err)
+               rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
+       else
+-- 
+2.43.0
+
diff --git a/queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch b/queue-5.15/bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch
new file mode 100644 (file)
index 0000000..3aa7d65
--- /dev/null
@@ -0,0 +1,42 @@
+From 9bb5f007e7a4af2a8e7284a87875684be129592f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 09:02:47 -0500
+Subject: Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 7e74aa53a68bf60f6019bd5d9a9a1406ec4d4865 ]
+
+If we received HCI_EV_IO_CAPA_REQUEST while
+HCI_OP_READ_REMOTE_EXT_FEATURES is yet to be responded assume the remote
+does support SSP since otherwise this event shouldn't be generated.
+
+Link: https://lore.kernel.org/linux-bluetooth/CABBYNZ+9UdG1cMZVmdtN3U2aS16AKMCyTARZZyFX7xTEDWcMOw@mail.gmail.com/T/#t
+Fixes: c7f59461f5a7 ("Bluetooth: Fix a refcnt underflow problem for hci_conn")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index c4a35d4612b05..0bfd856d079d5 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -4720,9 +4720,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+       hci_dev_lock(hdev);
+       conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+-      if (!conn || !hci_conn_ssp_enabled(conn))
++      if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
+               goto unlock;
++      /* Assume remote supports SSP since it has triggered this event */
++      set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
++
+       hci_conn_hold(conn);
+       if (!hci_dev_test_flag(hdev, HCI_MGMT))
+-- 
+2.43.0
+
diff --git a/queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch b/queue-5.15/bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch
new file mode 100644 (file)
index 0000000..4ba58c1
--- /dev/null
@@ -0,0 +1,42 @@
+From bbf6131045fd6659821e6ea7c4f14301cc924232 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Jan 2024 19:03:23 +0800
+Subject: Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
+
+From: Zijun Hu <quic_zijuhu@quicinc.com>
+
+[ Upstream commit 61a5ab72edea7ebc3ad2c6beea29d966f528ebfb ]
+
+hci_store_wake_reason() wrongly parses event HCI_Connection_Request
+as HCI_Connection_Complete and HCI_Connection_Complete as
+HCI_Connection_Request, so causes recording wakeup BD_ADDR error and
+potential stability issue, fix it by using the correct field.
+
+Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events")
+Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 2ad2f4647847c..c4a35d4612b05 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -6272,10 +6272,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
+        * keep track of the bdaddr of the connection event that woke us up.
+        */
+       if (event == HCI_EV_CONN_REQUEST) {
+-              bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
++              bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+               hdev->wake_addr_type = BDADDR_BREDR;
+       } else if (event == HCI_EV_CONN_COMPLETE) {
+-              bacpy(&hdev->wake_addr, &conn_request->bdaddr);
++              bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+               hdev->wake_addr_type = BDADDR_BREDR;
+       } else if (event == HCI_EV_LE_META) {
+               struct hci_ev_le_meta *le_ev = (void *)skb->data;
+-- 
+2.43.0
+
diff --git a/queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch b/queue-5.15/cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch
new file mode 100644 (file)
index 0000000..ddb3155
--- /dev/null
@@ -0,0 +1,42 @@
+From 4c864a7c9d27772a6b65133248c2d758de049e75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 17 Feb 2024 13:30:10 -0800
+Subject: cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf
+ call back
+
+From: Doug Smythies <dsmythies@telus.net>
+
+[ Upstream commit f0a0fc10abb062d122db5ac4ed42f6d1ca342649 ]
+
+There is a loophole in pstate limit clamping for the intel_cpufreq CPU
+frequency scaling driver (intel_pstate in passive mode), schedutil CPU
+frequency scaling governor, HWP (HardWare Pstate) control enabled, when
+the adjust_perf call back path is used.
+
+Fix it.
+
+Fixes: a365ab6b9dfb cpufreq: intel_pstate: Implement the ->adjust_perf() callback
+Signed-off-by: Doug Smythies <dsmythies@telus.net>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/intel_pstate.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index dd5f4eee9ffb6..4de71e772f514 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -2787,6 +2787,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+       if (min_pstate < cpu->min_perf_ratio)
+               min_pstate = cpu->min_perf_ratio;
++      if (min_pstate > cpu->max_perf_ratio)
++              min_pstate = cpu->max_perf_ratio;
++
+       max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+       if (max_pstate < min_pstate)
+               max_pstate = min_pstate;
+-- 
+2.43.0
+
diff --git a/queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch b/queue-5.15/efi-capsule-loader-fix-incorrect-allocation-size.patch
new file mode 100644 (file)
index 0000000..939e316
--- /dev/null
@@ -0,0 +1,43 @@
+From d2836ae85f8c0e574fbd4404d0ca6cf69a6cc476 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Feb 2024 12:24:40 +0100
+Subject: efi/capsule-loader: fix incorrect allocation size
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e ]
+
+gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures
+is not enough for a 64-bit phys_addr_t:
+
+drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open':
+drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size]
+  295 |         cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+      |                        ^
+
+Use the correct type instead here.
+
+Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/capsule-loader.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
+index 3e8d4b51a8140..97bafb5f70389 100644
+--- a/drivers/firmware/efi/capsule-loader.c
++++ b/drivers/firmware/efi/capsule-loader.c
+@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
+               return -ENOMEM;
+       }
+-      cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
++      cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL);
+       if (!cap_info->phys) {
+               kfree(cap_info->pages);
+               kfree(cap_info);
+-- 
+2.43.0
+
diff --git a/queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch b/queue-5.15/fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch
new file mode 100644 (file)
index 0000000..766c321
--- /dev/null
@@ -0,0 +1,95 @@
+From ea962eef9ca6461736867cb86a60da28aaa572cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 12:44:11 +0100
+Subject: fbcon: always restore the old font data in fbcon_do_set_font()
+
+From: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+
+[ Upstream commit 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f ]
+
+Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when
+vc_resize() failed) started restoring old font data upon failure (of
+vc_resize()). But it performs so only for user fonts. It means that the
+"system"/internal fonts are not restored at all. So in result, the very
+first call to fbcon_do_set_font() performs no restore at all upon
+failing vc_resize().
+
+This can be reproduced by Syzkaller to crash the system on the next
+invocation of font_get(). It's rather hard to hit the allocation failure
+in vc_resize() on the first font_set(), but not impossible. Esp. if
+fault injection is used to aid the execution/failure. It was
+demonstrated by Sirius:
+  BUG: unable to handle page fault for address: fffffffffffffff8
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0
+  Oops: 0000 [#1] PREEMPT SMP KASAN
+  CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+  RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286
+  Call Trace:
+   <TASK>
+   con_font_get drivers/tty/vt/vt.c:4558 [inline]
+   con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673
+   vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline]
+   vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752
+   tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803
+   vfs_ioctl fs/ioctl.c:51 [inline]
+  ...
+
+So restore the font data in any case, not only for user fonts. Note the
+later 'if' is now protected by 'old_userfont' and not 'old_data' as the
+latter is always set now. (And it is supposed to be non-NULL. Otherwise
+we would see the bug above again.)
+
+Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed")
+Reported-and-tested-by: Ubisectech Sirius <bugreport@ubisectech.com>
+Cc: Ubisectech Sirius <bugreport@ubisectech.com>
+Cc: Daniel Vetter <daniel@ffwll.ch>
+Cc: Helge Deller <deller@gmx.de>
+Cc: linux-fbdev@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index b6712655ec1f0..b163b54b868e6 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -2409,11 +2409,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+       int resize, ret, old_userfont, old_width, old_height, old_charcount;
+-      char *old_data = NULL;
++      u8 *old_data = vc->vc_font.data;
+       resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+-      if (p->userfont)
+-              old_data = vc->vc_font.data;
+       vc->vc_font.data = (void *)(p->fontdata = data);
+       old_userfont = p->userfont;
+       if ((p->userfont = userfont))
+@@ -2447,13 +2445,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+               update_screen(vc);
+       }
+-      if (old_data && (--REFCOUNT(old_data) == 0))
++      if (old_userfont && (--REFCOUNT(old_data) == 0))
+               kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+       return 0;
+ err_out:
+       p->fontdata = old_data;
+-      vc->vc_font.data = (void *)old_data;
++      vc->vc_font.data = old_data;
+       if (userfont) {
+               p->userfont = old_userfont;
+-- 
+2.43.0
+
diff --git a/queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch b/queue-5.15/igb-extend-ptp-timestamp-adjustments-to-i211.patch
new file mode 100644 (file)
index 0000000..235c994
--- /dev/null
@@ -0,0 +1,62 @@
+From 8c38f5c11f86a0c30a105012d3c28ba9cc7d94f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Feb 2024 10:49:41 -0800
+Subject: igb: extend PTP timestamp adjustments to i211
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 0bb7b09392eb74b152719ae87b1ba5e4bf910ef0 ]
+
+The i211 requires the same PTP timestamp adjustments as the i210,
+according to its datasheet. To ensure consistent timestamping across
+different platforms, this change extends the existing adjustments to
+include the i211.
+
+The adjustment result are tested and comparable for i210 and i211 based
+systems.
+
+Fixes: 3f544d2a4d5c ("igb: adjust PTP timestamps for Tx/Rx latency")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20240227184942.362710-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
+index 9cdb7a856ab6c..1a1575e8577af 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
+@@ -826,7 +826,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+       igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+       /* adjust timestamp for the TX latency based on link speed */
+-      if (adapter->hw.mac.type == e1000_i210) {
++      if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
+               switch (adapter->link_speed) {
+               case SPEED_10:
+                       adjust = IGB_I210_TX_LATENCY_10;
+@@ -872,6 +872,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+                       ktime_t *timestamp)
+ {
+       struct igb_adapter *adapter = q_vector->adapter;
++      struct e1000_hw *hw = &adapter->hw;
+       struct skb_shared_hwtstamps ts;
+       __le64 *regval = (__le64 *)va;
+       int adjust = 0;
+@@ -891,7 +892,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+       igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
+       /* adjust timestamp for the RX latency based on link speed */
+-      if (adapter->hw.mac.type == e1000_i210) {
++      if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
+               switch (adapter->link_speed) {
+               case SPEED_10:
+                       adjust = IGB_I210_RX_LATENCY_10;
+-- 
+2.43.0
+
diff --git a/queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch b/queue-5.15/ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch
new file mode 100644 (file)
index 0000000..6ab963d
--- /dev/null
@@ -0,0 +1,45 @@
+From 7b448de1f86cc50dbade510a3cafd85b321d4869 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Feb 2024 12:17:47 +0000
+Subject: ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 10bfd453da64a057bcfd1a49fb6b271c48653cdb ]
+
+It seems that if userspace provides a correct IFA_TARGET_NETNSID value
+but no IFA_ADDRESS and IFA_LOCAL attributes, inet6_rtm_getaddr()
+returns -EINVAL with an elevated "struct net" refcount.
+
+Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Ahern <dsahern@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/addrconf.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index c52317184e3e2..968ca078191cd 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5463,9 +5463,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+       }
+       addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
+-      if (!addr)
+-              return -EINVAL;
+-
++      if (!addr) {
++              err = -EINVAL;
++              goto errout;
++      }
+       ifm = nlmsg_data(nlh);
+       if (ifm->ifa_index)
+               dev = dev_get_by_index(tgt_net, ifm->ifa_index);
+-- 
+2.43.0
+
diff --git a/queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch b/queue-5.15/lan78xx-enable-auto-speed-configuration-for-lan7850-.patch
new file mode 100644 (file)
index 0000000..3790f60
--- /dev/null
@@ -0,0 +1,45 @@
+From 1aaed7c27b7dc092c5dcbda64bc0bd79e8703ecd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Feb 2024 13:38:38 +0100
+Subject: lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is
+ detected
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 0e67899abfbfdea0c3c0ed3fd263ffc601c5c157 ]
+
+Same as LAN7800, LAN7850 can be used without EEPROM. If EEPROM is not
+present or not flashed, LAN7850 will fail to sync the speed detected by the PHY
+with the MAC. In case link speed is 100Mbit, it will accidentally work,
+otherwise no data can be transferred.
+
+Better way would be to implement link_up callback, or set auto speed
+configuration unconditionally. But this changes would be more intrusive.
+So, for now, set it only if no EEPROM is found.
+
+Fixes: e69647a19c87 ("lan78xx: Set ASD in MAC_CR when EEE is enabled.")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Link: https://lore.kernel.org/r/20240222123839.2816561-1-o.rempel@pengutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/lan78xx.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 5700c9d20a3e2..c8b42892655a1 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -2862,7 +2862,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
+       if (dev->chipid == ID_REV_CHIP_ID_7801_)
+               buf &= ~MAC_CR_GMII_EN_;
+-      if (dev->chipid == ID_REV_CHIP_ID_7800_) {
++      if (dev->chipid == ID_REV_CHIP_ID_7800_ ||
++          dev->chipid == ID_REV_CHIP_ID_7850_) {
+               ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
+               if (!ret && sig != EEPROM_INDICATOR) {
+                       /* Implies there is no external eeprom. Set mac speed */
+-- 
+2.43.0
+
diff --git a/queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch b/queue-5.15/mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch
new file mode 100644 (file)
index 0000000..e9c868b
--- /dev/null
@@ -0,0 +1,63 @@
+From 4a0367bb181569b5cc86ae4e1018b971b3e769ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Nov 2023 09:07:01 -0600
+Subject: mtd: spinand: gigadevice: Fix the get ecc status issue
+
+From: Han Xu <han.xu@nxp.com>
+
+[ Upstream commit 59950610c0c00c7a06d8a75d2ee5d73dba4274cf ]
+
+Some GigaDevice ecc_get_status functions use on-stack buffer for
+spi_mem_op causes spi_mem_check_op failing, fix the issue by using
+spinand scratchbuf.
+
+Fixes: c40c7a990a46 ("mtd: spinand: Add support for GigaDevice GD5F1GQ4UExxG")
+Signed-off-by: Han Xu <han.xu@nxp.com>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20231108150701.593912-1-han.xu@nxp.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/nand/spi/gigadevice.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
+index da77ab20296ea..56d1b56615f97 100644
+--- a/drivers/mtd/nand/spi/gigadevice.c
++++ b/drivers/mtd/nand/spi/gigadevice.c
+@@ -178,7 +178,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
+ {
+       u8 status2;
+       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
+-                                                    &status2);
++                                                    spinand->scratchbuf);
+       int ret;
+       switch (status & STATUS_ECC_MASK) {
+@@ -199,6 +199,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
+                * report the maximum of 4 in this case
+                */
+               /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */
++              status2 = *(spinand->scratchbuf);
+               return ((status & STATUS_ECC_MASK) >> 2) |
+                       ((status2 & STATUS_ECC_MASK) >> 4);
+@@ -220,7 +221,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
+ {
+       u8 status2;
+       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
+-                                                    &status2);
++                                                    spinand->scratchbuf);
+       int ret;
+       switch (status & STATUS_ECC_MASK) {
+@@ -240,6 +241,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
+                * 1 ... 4 bits are flipped (and corrected)
+                */
+               /* bits sorted this way (1...0): ECCSE1, ECCSE0 */
++              status2 = *(spinand->scratchbuf);
+               return ((status2 & STATUS_ECC_MASK) >> 4) + 1;
+       case STATUS_ECC_UNCOR_ERROR:
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch b/queue-5.15/net-enable-memcg-accounting-for-veth-queues.patch
new file mode 100644 (file)
index 0000000..ec6c89b
--- /dev/null
@@ -0,0 +1,37 @@
+From c9e4dec79cee5595362c0b72b84e612297ad973b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 08:17:35 +0300
+Subject: net: enable memcg accounting for veth queues
+
+From: Vasily Averin <vvs@openvz.org>
+
+[ Upstream commit 961c6136359eef38a8c023d02028fdcd123f02a6 ]
+
+veth netdevice defines own rx queues and allocates array containing
+up to 4095 ~750-bytes-long 'struct veth_rq' elements. Such allocation
+is quite huge and should be accounted to memcg.
+
+Signed-off-by: Vasily Averin <vvs@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 1ce7d306ea63 ("veth: try harder when allocating queue memory")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 85c3e12f83627..87cee614618ca 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1303,7 +1303,7 @@ static int veth_alloc_queues(struct net_device *dev)
+       struct veth_priv *priv = netdev_priv(dev);
+       int i;
+-      priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
++      priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+       if (!priv->rq)
+               return -ENOMEM;
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch b/queue-5.15/net-ip_tunnel-prevent-perpetual-headroom-growth.patch
new file mode 100644 (file)
index 0000000..3c57484
--- /dev/null
@@ -0,0 +1,181 @@
+From bada6523094d175dcf46b7499d3a14131760a80a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Feb 2024 14:56:02 +0100
+Subject: net: ip_tunnel: prevent perpetual headroom growth
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 5ae1e9922bbdbaeb9cfbe91085ab75927488ac0f ]
+
+syzkaller triggered following kasan splat:
+BUG: KASAN: use-after-free in __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170
+Read of size 1 at addr ffff88812fb4000e by task syz-executor183/5191
+[..]
+ kasan_report+0xda/0x110 mm/kasan/report.c:588
+ __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170
+ skb_flow_dissect_flow_keys include/linux/skbuff.h:1514 [inline]
+ ___skb_get_hash net/core/flow_dissector.c:1791 [inline]
+ __skb_get_hash+0xc7/0x540 net/core/flow_dissector.c:1856
+ skb_get_hash include/linux/skbuff.h:1556 [inline]
+ ip_tunnel_xmit+0x1855/0x33c0 net/ipv4/ip_tunnel.c:748
+ ipip_tunnel_xmit+0x3cc/0x4e0 net/ipv4/ipip.c:308
+ __netdev_start_xmit include/linux/netdevice.h:4940 [inline]
+ netdev_start_xmit include/linux/netdevice.h:4954 [inline]
+ xmit_one net/core/dev.c:3548 [inline]
+ dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564
+ __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4349
+ dev_queue_xmit include/linux/netdevice.h:3134 [inline]
+ neigh_connected_output+0x42c/0x5d0 net/core/neighbour.c:1592
+ ...
+ ip_finish_output2+0x833/0x2550 net/ipv4/ip_output.c:235
+ ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323
+ ..
+ iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82
+ ip_tunnel_xmit+0x1dbc/0x33c0 net/ipv4/ip_tunnel.c:831
+ ipgre_xmit+0x4a1/0x980 net/ipv4/ip_gre.c:665
+ __netdev_start_xmit include/linux/netdevice.h:4940 [inline]
+ netdev_start_xmit include/linux/netdevice.h:4954 [inline]
+ xmit_one net/core/dev.c:3548 [inline]
+ dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564
+ ...
+
+The splat occurs because skb->data points past skb->head allocated area.
+This is because neigh layer does:
+  __skb_pull(skb, skb_network_offset(skb));
+
+... but skb_network_offset() returns a negative offset and __skb_pull()
+arg is unsigned.  IOW, we skb->data gets "adjusted" by a huge value.
+
+The negative value is returned because skb->head and skb->data distance is
+more than 64k and skb->network_header (u16) has wrapped around.
+
+The bug is in the ip_tunnel infrastructure, which can cause
+dev->needed_headroom to increment ad infinitum.
+
+The syzkaller reproducer consists of packets getting routed via a gre
+tunnel, and route of gre encapsulated packets pointing at another (ipip)
+tunnel.  The ipip encapsulation finds gre0 as next output device.
+
+This results in the following pattern:
+
+1). First packet is to be sent out via gre0.
+Route lookup found an output device, ipip0.
+
+2).
+ip_tunnel_xmit for gre0 bumps gre0->needed_headroom based on the future
+output device, rt.dev->needed_headroom (ipip0).
+
+3).
+ip output / start_xmit moves skb on to ipip0. which runs the same
+code path again (xmit recursion).
+
+4).
+Routing step for the post-gre0-encap packet finds gre0 as output device
+to use for ipip0 encapsulated packet.
+
+tunl0->needed_headroom is then incremented based on the (already bumped)
+gre0 device headroom.
+
+This repeats for every future packet:
+
+gre0->needed_headroom gets inflated because previous packets' ipip0 step
+incremented rt->dev (gre0) headroom, and ipip0 incremented because gre0
+needed_headroom was increased.
+
+For each subsequent packet, gre/ipip0->needed_headroom grows until
+post-expand-head reallocations result in a skb->head/data distance of
+more than 64k.
+
+Once that happens, skb->network_header (u16) wraps around when
+pskb_expand_head tries to make sure that skb_network_offset() is unchanged
+after the headroom expansion/reallocation.
+
+After this skb_network_offset(skb) returns a different (and negative)
+result post headroom expansion.
+
+The next trip to neigh layer (or anything else that would __skb_pull the
+network header) makes skb->data point to a memory location outside
+skb->head area.
+
+v2: Cap the needed_headroom update to an arbitarily chosen upperlimit to
+prevent perpetual increase instead of dropping the headroom increment
+completely.
+
+Reported-and-tested-by: syzbot+bfde3bef047a81b8fde6@syzkaller.appspotmail.com
+Closes: https://groups.google.com/g/syzkaller-bugs/c/fL9G6GtWskY/m/VKk_PR5FBAAJ
+Fixes: 243aad830e8a ("ip_gre: include route header_len in max_headroom calculation")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240220135606.4939-1-fw@strlen.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/ip_tunnel.c | 28 +++++++++++++++++++++-------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index 426dc910aaf87..96b7cd3049a33 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+       return 0;
+ }
++static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
++{
++      /* we must cap headroom to some upperlimit, else pskb_expand_head
++       * will overflow header offsets in skb_headers_offset_update().
++       */
++      static const unsigned int max_allowed = 512;
++
++      if (headroom > max_allowed)
++              headroom = max_allowed;
++
++      if (headroom > READ_ONCE(dev->needed_headroom))
++              WRITE_ONCE(dev->needed_headroom, headroom);
++}
++
+ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+                      u8 proto, int tunnel_hlen)
+ {
+@@ -613,13 +627,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+       }
+       headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+-      if (headroom > READ_ONCE(dev->needed_headroom))
+-              WRITE_ONCE(dev->needed_headroom, headroom);
+-
+-      if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
++      if (skb_cow_head(skb, headroom)) {
+               ip_rt_put(rt);
+               goto tx_dropped;
+       }
++
++      ip_tunnel_adj_headroom(dev, headroom);
++
+       iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+                     df, !net_eq(tunnel->net, dev_net(dev)));
+       return;
+@@ -797,16 +811,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+       max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+                       + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
+-      if (max_headroom > READ_ONCE(dev->needed_headroom))
+-              WRITE_ONCE(dev->needed_headroom, max_headroom);
+-      if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
++      if (skb_cow_head(skb, max_headroom)) {
+               ip_rt_put(rt);
+               dev->stats.tx_dropped++;
+               kfree_skb(skb);
+               return;
+       }
++      ip_tunnel_adj_headroom(dev, max_headroom);
++
+       iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
+                     df, !net_eq(tunnel->net, dev_net(dev)));
+       return;
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch b/queue-5.15/net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch
new file mode 100644 (file)
index 0000000..7af1359
--- /dev/null
@@ -0,0 +1,49 @@
+From 07a2d624ff2c54511c92e6d2a8440348a4b97921 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 25 Feb 2024 00:20:06 +0100
+Subject: net: usb: dm9601: fix wrong return value in dm9601_mdio_read
+
+From: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+
+[ Upstream commit c68b2c9eba38ec3f60f4894b189090febf4d8d22 ]
+
+The MII code does not check the return value of mdio_read (among
+others), and therefore no error code should be sent. A previous fix to
+the use of an uninitialized variable propagates negative error codes,
+that might lead to wrong operations by the MII library.
+
+An example of such issues is the use of mii_nway_restart by the dm9601
+driver. The mii_nway_restart function does not check the value returned
+by mdio_read, which in this case might be a negative number which could
+contain the exact bit the function checks (BMCR_ANENABLE = 0x1000).
+
+Return zero in case of error, as it is common practice in users of
+mdio_read to avoid wrong uses of the return value.
+
+Fixes: 8f8abb863fa5 ("net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read")
+Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Peter Korsgaard <peter@korsgaard.com>
+Link: https://lore.kernel.org/r/20240225-dm9601_ret_err-v1-1-02c1d959ea59@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/dm9601.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
+index 1959e12a3ff8a..f7357d884d6aa 100644
+--- a/drivers/net/usb/dm9601.c
++++ b/drivers/net/usb/dm9601.c
+@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
+       err = dm_read_shared_word(dev, 1, loc, &res);
+       if (err < 0) {
+               netdev_err(dev->net, "MDIO read error: %d\n", err);
+-              return err;
++              return 0;
+       }
+       netdev_dbg(dev->net,
+-- 
+2.43.0
+
diff --git a/queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch b/queue-5.15/net-veth-clear-gro-when-clearing-xdp-even-when-down.patch
new file mode 100644 (file)
index 0000000..9ee7556
--- /dev/null
@@ -0,0 +1,121 @@
+From 8efa248bc406923275a53b52bf40ba3122ff59d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 15:12:10 -0800
+Subject: net: veth: clear GRO when clearing XDP even when down
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit fe9f801355f0b47668419f30f1fac1cf4539e736 ]
+
+veth sets NETIF_F_GRO automatically when XDP is enabled,
+because both features use the same NAPI machinery.
+
+The logic to clear NETIF_F_GRO sits in veth_disable_xdp() which
+is called both on ndo_stop and when XDP is turned off.
+To avoid the flag from being cleared when the device is brought
+down, the clearing is skipped when IFF_UP is not set.
+Bringing the device down should indeed not modify its features.
+
+Unfortunately, this means that clearing is also skipped when
+XDP is disabled _while_ the device is down. And there's nothing
+on the open path to bring the device features back into sync.
+IOW if user enables XDP, disables it and then brings the device
+up we'll end up with a stray GRO flag set but no NAPI instances.
+
+We don't depend on the GRO flag on the datapath, so the datapath
+won't crash. We will crash (or hang), however, next time features
+are sync'ed (either by user via ethtool or peer changing its config).
+The GRO flag will go away, and veth will try to disable the NAPIs.
+But the open path never created them since XDP was off, the GRO flag
+was a stray. If NAPI was initialized before we'll hang in napi_disable().
+If it never was we'll crash trying to stop uninitialized hrtimer.
+
+Move the GRO flag updates to the XDP enable / disable paths,
+instead of mixing them with the ndo_open / ndo_close paths.
+
+Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP")
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: syzbot+039399a9b96297ddedca@syzkaller.appspotmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 35 +++++++++++++++++------------------
+ 1 file changed, 17 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 984a153804096..85c3e12f83627 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1079,14 +1079,6 @@ static int veth_enable_xdp(struct net_device *dev)
+                               veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
+                               return err;
+                       }
+-
+-                      if (!veth_gro_requested(dev)) {
+-                              /* user-space did not require GRO, but adding XDP
+-                               * is supposed to get GRO working
+-                               */
+-                              dev->features |= NETIF_F_GRO;
+-                              netdev_features_change(dev);
+-                      }
+               }
+       }
+@@ -1106,18 +1098,9 @@ static void veth_disable_xdp(struct net_device *dev)
+       for (i = 0; i < dev->real_num_rx_queues; i++)
+               rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
+-      if (!netif_running(dev) || !veth_gro_requested(dev)) {
++      if (!netif_running(dev) || !veth_gro_requested(dev))
+               veth_napi_del(dev);
+-              /* if user-space did not require GRO, since adding XDP
+-               * enabled it, clear it now
+-               */
+-              if (!veth_gro_requested(dev) && netif_running(dev)) {
+-                      dev->features &= ~NETIF_F_GRO;
+-                      netdev_features_change(dev);
+-              }
+-      }
+-
+       veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
+ }
+@@ -1497,6 +1480,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+               }
+               if (!old_prog) {
++                      if (!veth_gro_requested(dev)) {
++                              /* user-space did not require GRO, but adding
++                               * XDP is supposed to get GRO working
++                               */
++                              dev->features |= NETIF_F_GRO;
++                              netdev_features_change(dev);
++                      }
++
+                       peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+                       peer->max_mtu = max_mtu;
+               }
+@@ -1507,6 +1498,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+                       if (dev->flags & IFF_UP)
+                               veth_disable_xdp(dev);
++                      /* if user-space did not require GRO, since adding XDP
++                       * enabled it, clear it now
++                       */
++                      if (!veth_gro_requested(dev)) {
++                              dev->features &= ~NETIF_F_GRO;
++                              netdev_features_change(dev);
++                      }
++
+                       if (peer) {
+                               peer->hw_features |= NETIF_F_GSO_SOFTWARE;
+                               peer->max_mtu = ETH_MAX_MTU;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch b/queue-5.15/netfilter-bridge-confirm-multicast-packets-before-pa.patch
new file mode 100644 (file)
index 0000000..1333ded
--- /dev/null
@@ -0,0 +1,282 @@
+From d193e211fa7b4fbc137c4378057862b37c14f657 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Feb 2024 16:17:51 +0100
+Subject: netfilter: bridge: confirm multicast packets before passing them up
+ the stack
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9 ]
+
+conntrack nf_confirm logic cannot handle cloned skbs referencing
+the same nf_conn entry, which will happen for multicast (broadcast)
+frames on bridges.
+
+ Example:
+    macvlan0
+       |
+      br0
+     /  \
+  ethX    ethY
+
+ ethX (or Y) receives a L2 multicast or broadcast packet containing
+ an IP packet, flow is not yet in conntrack table.
+
+ 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+    -> skb->_nfct now references a unconfirmed entry
+ 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+    interface.
+ 3. skb gets passed up the stack.
+ 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+    and schedules a work queue to send them out on the lower devices.
+
+    The clone skb->_nfct is not a copy, it is the same entry as the
+    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
+ 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+
+The Macvlan broadcast worker and normal confirm path will race.
+
+This race will not happen if step 2 already confirmed a clone. In that
+case later steps perform skb_clone() with skb->_nfct already confirmed (in
+hash table).  This works fine.
+
+But such confirmation won't happen when eb/ip/nftables rules dropped the
+packets before they reached the nf_confirm step in postrouting.
+
+Pablo points out that nf_conntrack_bridge doesn't allow use of stateful
+nat, so we can safely discard the nf_conn entry and let inet call
+conntrack again.
+
+This doesn't work for bridge netfilter: skb could have a nat
+transformation. Also bridge nf prevents re-invocation of inet prerouting
+via 'sabotage_in' hook.
+
+Work around this problem by explicit confirmation of the entry at LOCAL_IN
+time, before upper layer has a chance to clone the unconfirmed entry.
+
+The downside is that this disables NAT and conntrack helpers.
+
+Alternative fix would be to add locking to all code parts that deal with
+unconfirmed packets, but even if that could be done in a sane way this
+opens up other problems, for example:
+
+-m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4
+-m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5
+
+For multicast case, only one of such conflicting mappings will be
+created, conntrack only handles 1:1 NAT mappings.
+
+Users should set create a setup that explicitly marks such traffic
+NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass
+them, ruleset might have accept rules for untracked traffic already,
+so user-visible behaviour would change.
+
+Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter.h                  |  1 +
+ net/bridge/br_netfilter_hooks.c            | 96 ++++++++++++++++++++++
+ net/bridge/netfilter/nf_conntrack_bridge.c | 30 +++++++
+ net/netfilter/nf_conntrack_core.c          |  1 +
+ 4 files changed, 128 insertions(+)
+
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index c92bb1580f419..c69cbd64b5b46 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -461,6 +461,7 @@ struct nf_ct_hook {
+                             const struct sk_buff *);
+       void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
+       void (*set_closing)(struct nf_conntrack *nfct);
++      int (*confirm)(struct sk_buff *skb);
+ };
+ extern const struct nf_ct_hook __rcu *nf_ct_hook;
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index f14beb9a62edb..8a114a5000466 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -43,6 +43,10 @@
+ #include <linux/sysctl.h>
+ #endif
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++#include <net/netfilter/nf_conntrack_core.h>
++#endif
++
+ static unsigned int brnf_net_id __read_mostly;
+ struct brnf_net {
+@@ -537,6 +541,90 @@ static unsigned int br_nf_pre_routing(void *priv,
+       return NF_STOLEN;
+ }
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
++ * the same nf_conn entry, which will happen for multicast (broadcast)
++ * Frames on bridges.
++ *
++ * Example:
++ *      macvlan0
++ *      br0
++ *  ethX  ethY
++ *
++ * ethX (or Y) receives multicast or broadcast packet containing
++ * an IP packet, not yet in conntrack table.
++ *
++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
++ *    -> skb->_nfct now references a unconfirmed entry
++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
++ *    interface.
++ * 3. skb gets passed up the stack.
++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
++ *    and schedules a work queue to send them out on the lower devices.
++ *
++ *    The clone skb->_nfct is not a copy, it is the same entry as the
++ *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
++ *
++ * The Macvlan broadcast worker and normal confirm path will race.
++ *
++ * This race will not happen if step 2 already confirmed a clone. In that
++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
++ * hash table).  This works fine.
++ *
++ * But such confirmation won't happen when eb/ip/nftables rules dropped the
++ * packets before they reached the nf_confirm step in postrouting.
++ *
++ * Work around this problem by explicit confirmation of the entry at
++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
++ * entry.
++ *
++ */
++static unsigned int br_nf_local_in(void *priv,
++                                 struct sk_buff *skb,
++                                 const struct nf_hook_state *state)
++{
++      struct nf_conntrack *nfct = skb_nfct(skb);
++      const struct nf_ct_hook *ct_hook;
++      struct nf_conn *ct;
++      int ret;
++
++      if (!nfct || skb->pkt_type == PACKET_HOST)
++              return NF_ACCEPT;
++
++      ct = container_of(nfct, struct nf_conn, ct_general);
++      if (likely(nf_ct_is_confirmed(ct)))
++              return NF_ACCEPT;
++
++      WARN_ON_ONCE(skb_shared(skb));
++      WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
++
++      /* We can't call nf_confirm here, it would create a dependency
++       * on nf_conntrack module.
++       */
++      ct_hook = rcu_dereference(nf_ct_hook);
++      if (!ct_hook) {
++              skb->_nfct = 0ul;
++              nf_conntrack_put(nfct);
++              return NF_ACCEPT;
++      }
++
++      nf_bridge_pull_encap_header(skb);
++      ret = ct_hook->confirm(skb);
++      switch (ret & NF_VERDICT_MASK) {
++      case NF_STOLEN:
++              return NF_STOLEN;
++      default:
++              nf_bridge_push_encap_header(skb);
++              break;
++      }
++
++      ct = container_of(nfct, struct nf_conn, ct_general);
++      WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
++
++      return ret;
++}
++#endif
+ /* PF_BRIDGE/FORWARD *************************************************/
+ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+@@ -935,6 +1023,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
+               .hooknum = NF_BR_PRE_ROUTING,
+               .priority = NF_BR_PRI_BRNF,
+       },
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++      {
++              .hook = br_nf_local_in,
++              .pf = NFPROTO_BRIDGE,
++              .hooknum = NF_BR_LOCAL_IN,
++              .priority = NF_BR_PRI_LAST,
++      },
++#endif
+       {
+               .hook = br_nf_forward_ip,
+               .pf = NFPROTO_BRIDGE,
+diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
+index d14b2dbbd1dfb..83743e95939b1 100644
+--- a/net/bridge/netfilter/nf_conntrack_bridge.c
++++ b/net/bridge/netfilter/nf_conntrack_bridge.c
+@@ -290,6 +290,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+       return nf_conntrack_in(skb, &bridge_state);
+ }
++static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
++                                  const struct nf_hook_state *state)
++{
++      enum ip_conntrack_info ctinfo;
++      struct nf_conn *ct;
++
++      if (skb->pkt_type == PACKET_HOST)
++              return NF_ACCEPT;
++
++      /* nf_conntrack_confirm() cannot handle concurrent clones,
++       * this happens for broad/multicast frames with e.g. macvlan on top
++       * of the bridge device.
++       */
++      ct = nf_ct_get(skb, &ctinfo);
++      if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
++              return NF_ACCEPT;
++
++      /* let inet prerouting call conntrack again */
++      skb->_nfct = 0;
++      nf_ct_put(ct);
++
++      return NF_ACCEPT;
++}
++
+ static void nf_ct_bridge_frag_save(struct sk_buff *skb,
+                                  struct nf_bridge_frag_data *data)
+ {
+@@ -414,6 +438,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
+               .hooknum        = NF_BR_PRE_ROUTING,
+               .priority       = NF_IP_PRI_CONNTRACK,
+       },
++      {
++              .hook           = nf_ct_bridge_in,
++              .pf             = NFPROTO_BRIDGE,
++              .hooknum        = NF_BR_LOCAL_IN,
++              .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
++      },
+       {
+               .hook           = nf_ct_bridge_post,
+               .pf             = NFPROTO_BRIDGE,
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index e0f4f76439d3d..be6031886f942 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -2850,6 +2850,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
+       .get_tuple_skb  = nf_conntrack_get_tuple_skb,
+       .attach         = nf_conntrack_attach,
+       .set_closing    = nf_conntrack_set_closing,
++      .confirm        = __nf_conntrack_confirm,
+ };
+ void nf_conntrack_init_end(void)
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch b/queue-5.15/netfilter-core-move-ip_ct_attach-indirection-to-stru.patch
new file mode 100644 (file)
index 0000000..70c0eed
--- /dev/null
@@ -0,0 +1,109 @@
+From 2bf15b346e8c5bbdbf2bf246a13d8a784adc60c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jan 2022 05:03:23 +0100
+Subject: netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 3fce16493dc1aa2c9af3d7e7bd360dfe203a3e6a ]
+
+ip_ct_attach predates struct nf_ct_hook, we can place it there and
+remove the exported symbol.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter.h         |  2 +-
+ net/netfilter/core.c              | 19 ++++++++-----------
+ net/netfilter/nf_conntrack_core.c |  4 +---
+ 3 files changed, 10 insertions(+), 15 deletions(-)
+
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index e20c2db0f2c16..64acdf22eb4fa 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -435,7 +435,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ #include <linux/netfilter/nf_conntrack_zones_common.h>
+-extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
+ void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+ struct nf_conntrack_tuple;
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+@@ -458,6 +457,7 @@ struct nf_ct_hook {
+       void (*destroy)(struct nf_conntrack *);
+       bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+                             const struct sk_buff *);
++      void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
+ };
+ extern struct nf_ct_hook __rcu *nf_ct_hook;
+diff --git a/net/netfilter/core.c b/net/netfilter/core.c
+index ffa84cafb746b..5396d27ba6a71 100644
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -639,25 +639,22 @@ struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_ct_hook);
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+-/* This does not belong here, but locally generated errors need it if connection
+-   tracking in use: without this, connection may not be in hash table, and hence
+-   manufactured ICMP or RST packets will not be associated with it. */
+-void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
+-              __rcu __read_mostly;
+-EXPORT_SYMBOL(ip_ct_attach);
+-
+ struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_nat_hook);
++/* This does not belong here, but locally generated errors need it if connection
++ * tracking in use: without this, connection may not be in hash table, and hence
++ * manufactured ICMP or RST packets will not be associated with it.
++ */
+ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
+ {
+-      void (*attach)(struct sk_buff *, const struct sk_buff *);
++      const struct nf_ct_hook *ct_hook;
+       if (skb->_nfct) {
+               rcu_read_lock();
+-              attach = rcu_dereference(ip_ct_attach);
+-              if (attach)
+-                      attach(new, skb);
++              ct_hook = rcu_dereference(nf_ct_hook);
++              if (ct_hook)
++                      ct_hook->attach(new, skb);
+               rcu_read_unlock();
+       }
+ }
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 10622760f894a..779e41d1afdce 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -2518,7 +2518,6 @@ static int kill_all(struct nf_conn *i, void *data)
+ void nf_conntrack_cleanup_start(void)
+ {
+       conntrack_gc_work.exiting = true;
+-      RCU_INIT_POINTER(ip_ct_attach, NULL);
+ }
+ void nf_conntrack_cleanup_end(void)
+@@ -2838,12 +2837,11 @@ static struct nf_ct_hook nf_conntrack_hook = {
+       .update         = nf_conntrack_update,
+       .destroy        = nf_ct_destroy,
+       .get_tuple_skb  = nf_conntrack_get_tuple_skb,
++      .attach         = nf_conntrack_attach,
+ };
+ void nf_conntrack_init_end(void)
+ {
+-      /* For use by REJECT target */
+-      RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
+       RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook);
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch b/queue-5.15/netfilter-let-reset-rules-clean-out-conntrack-entrie.patch
new file mode 100644 (file)
index 0000000..6b39f3d
--- /dev/null
@@ -0,0 +1,229 @@
+From 66e009926a2ed09a54351ac4e2b0c6ccb7423bdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Feb 2023 14:45:22 +0100
+Subject: netfilter: let reset rules clean out conntrack entries
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 2954fe60e33da0f4de4d81a4c95c7dddb517d00c ]
+
+iptables/nftables support responding to tcp packets with tcp resets.
+
+The generated tcp reset packet passes through both output and postrouting
+netfilter hooks, but conntrack will never see them because the generated
+skb has its ->nfct pointer copied over from the packet that triggered the
+reset rule.
+
+If the reset rule is used for established connections, this
+may result in the conntrack entry to be around for a very long
+time (default timeout is 5 days).
+
+One way to avoid this would be to not copy the nf_conn pointer
+so that the rest packet passes through conntrack too.
+
+Problem is that output rules might not have the same conntrack
+zone setup as the prerouting ones, so its possible that the
+reset skb won't find the correct entry.  Generating a template
+entry for the skb seems error prone as well.
+
+Add an explicit "closing" function that switches a confirmed
+conntrack entry to closed state and wire this up for tcp.
+
+If the entry isn't confirmed, no action is needed because
+the conntrack entry will never be committed to the table.
+
+Reported-by: Russel King <linux@armlinux.org.uk>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter.h              |  3 +++
+ include/net/netfilter/nf_conntrack.h   |  8 ++++++
+ net/ipv4/netfilter/nf_reject_ipv4.c    |  1 +
+ net/ipv6/netfilter/nf_reject_ipv6.c    |  1 +
+ net/netfilter/core.c                   | 16 ++++++++++++
+ net/netfilter/nf_conntrack_core.c      | 12 +++++++++
+ net/netfilter/nf_conntrack_proto_tcp.c | 35 ++++++++++++++++++++++++++
+ 7 files changed, 76 insertions(+)
+
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index 5a665034c30be..c92bb1580f419 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -436,11 +436,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
+ #include <linux/netfilter/nf_conntrack_zones_common.h>
+ void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
++void nf_ct_set_closing(struct nf_conntrack *nfct);
+ struct nf_conntrack_tuple;
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                        const struct sk_buff *skb);
+ #else
+ static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
++static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {}
+ struct nf_conntrack_tuple;
+ static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                                      const struct sk_buff *skb)
+@@ -458,6 +460,7 @@ struct nf_ct_hook {
+       bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+                             const struct sk_buff *);
+       void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
++      void (*set_closing)(struct nf_conntrack *nfct);
+ };
+ extern const struct nf_ct_hook __rcu *nf_ct_hook;
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index 34c266502a50e..39541ab912a16 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -123,6 +123,12 @@ struct nf_conn {
+       union nf_conntrack_proto proto;
+ };
++static inline struct nf_conn *
++nf_ct_to_nf_conn(const struct nf_conntrack *nfct)
++{
++      return container_of(nfct, struct nf_conn, ct_general);
++}
++
+ static inline struct nf_conn *
+ nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
+ {
+@@ -173,6 +179,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+ void nf_ct_destroy(struct nf_conntrack *nfct);
++void nf_conntrack_tcp_set_closing(struct nf_conn *ct);
++
+ /* decrement reference count on a conntrack */
+ static inline void nf_ct_put(struct nf_conn *ct)
+ {
+diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
+index f2edb40c0db00..350aaca126181 100644
+--- a/net/ipv4/netfilter/nf_reject_ipv4.c
++++ b/net/ipv4/netfilter/nf_reject_ipv4.c
+@@ -278,6 +278,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+               goto free_nskb;
+       nf_ct_attach(nskb, oldskb);
++      nf_ct_set_closing(skb_nfct(oldskb));
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+       /* If we use ip_local_out for bridged traffic, the MAC source on
+diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
+index dffeaaaadcded..c0057edd84cfc 100644
+--- a/net/ipv6/netfilter/nf_reject_ipv6.c
++++ b/net/ipv6/netfilter/nf_reject_ipv6.c
+@@ -345,6 +345,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+       nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
+       nf_ct_attach(nskb, oldskb);
++      nf_ct_set_closing(skb_nfct(oldskb));
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+       /* If we use ip6_local_out for bridged traffic, the MAC source on
+diff --git a/net/netfilter/core.c b/net/netfilter/core.c
+index aa3f7d3228fda..fe81824799d95 100644
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -674,6 +674,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
+ }
+ EXPORT_SYMBOL(nf_conntrack_destroy);
++void nf_ct_set_closing(struct nf_conntrack *nfct)
++{
++      const struct nf_ct_hook *ct_hook;
++
++      if (!nfct)
++              return;
++
++      rcu_read_lock();
++      ct_hook = rcu_dereference(nf_ct_hook);
++      if (ct_hook)
++              ct_hook->set_closing(nfct);
++
++      rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_ct_set_closing);
++
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                        const struct sk_buff *skb)
+ {
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 2a4222eefc894..e0f4f76439d3d 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -2833,11 +2833,23 @@ int nf_conntrack_init_start(void)
+       return ret;
+ }
++static void nf_conntrack_set_closing(struct nf_conntrack *nfct)
++{
++      struct nf_conn *ct = nf_ct_to_nf_conn(nfct);
++
++      switch (nf_ct_protonum(ct)) {
++      case IPPROTO_TCP:
++              nf_conntrack_tcp_set_closing(ct);
++              break;
++      }
++}
++
+ static const struct nf_ct_hook nf_conntrack_hook = {
+       .update         = nf_conntrack_update,
+       .destroy        = nf_ct_destroy,
+       .get_tuple_skb  = nf_conntrack_get_tuple_skb,
+       .attach         = nf_conntrack_attach,
++      .set_closing    = nf_conntrack_set_closing,
+ };
+ void nf_conntrack_init_end(void)
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index 1ecfdc4f23be8..f33e6aea7f4da 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -870,6 +870,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
+       return false;
+ }
++void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
++{
++      enum tcp_conntrack old_state;
++      const unsigned int *timeouts;
++      u32 timeout;
++
++      if (!nf_ct_is_confirmed(ct))
++              return;
++
++      spin_lock_bh(&ct->lock);
++      old_state = ct->proto.tcp.state;
++      ct->proto.tcp.state = TCP_CONNTRACK_CLOSE;
++
++      if (old_state == TCP_CONNTRACK_CLOSE ||
++          test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
++              spin_unlock_bh(&ct->lock);
++              return;
++      }
++
++      timeouts = nf_ct_timeout_lookup(ct);
++      if (!timeouts) {
++              const struct nf_tcp_net *tn;
++
++              tn = nf_tcp_pernet(nf_ct_net(ct));
++              timeouts = tn->timeouts;
++      }
++
++      timeout = timeouts[TCP_CONNTRACK_CLOSE];
++      WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
++
++      spin_unlock_bh(&ct->lock);
++
++      nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
++}
++
+ static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+ {
+       state->td_end           = 0;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-make-function-op-structures-const.patch b/queue-5.15/netfilter-make-function-op-structures-const.patch
new file mode 100644 (file)
index 0000000..439a193
--- /dev/null
@@ -0,0 +1,208 @@
+From 3ce961662e11bb6747c87f0831db00013039dbc6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jan 2022 05:03:24 +0100
+Subject: netfilter: make function op structures const
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 285c8a7a58158cb1805c97ff03875df2ba2ea1fe ]
+
+No functional changes, these structures should be const.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter.h            |  8 ++++----
+ net/netfilter/core.c                 | 10 +++++-----
+ net/netfilter/nf_conntrack_core.c    |  4 ++--
+ net/netfilter/nf_conntrack_netlink.c |  4 ++--
+ net/netfilter/nf_nat_core.c          |  2 +-
+ net/netfilter/nfnetlink_queue.c      |  8 ++++----
+ 6 files changed, 18 insertions(+), 18 deletions(-)
+
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index 64acdf22eb4fa..5a665034c30be 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -376,13 +376,13 @@ struct nf_nat_hook {
+                                 enum ip_conntrack_dir dir);
+ };
+-extern struct nf_nat_hook __rcu *nf_nat_hook;
++extern const struct nf_nat_hook __rcu *nf_nat_hook;
+ static inline void
+ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
+ {
+ #if IS_ENABLED(CONFIG_NF_NAT)
+-      struct nf_nat_hook *nat_hook;
++      const struct nf_nat_hook *nat_hook;
+       rcu_read_lock();
+       nat_hook = rcu_dereference(nf_nat_hook);
+@@ -459,7 +459,7 @@ struct nf_ct_hook {
+                             const struct sk_buff *);
+       void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
+ };
+-extern struct nf_ct_hook __rcu *nf_ct_hook;
++extern const struct nf_ct_hook __rcu *nf_ct_hook;
+ struct nlattr;
+@@ -474,7 +474,7 @@ struct nfnl_ct_hook {
+       void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
+                          enum ip_conntrack_info ctinfo, s32 off);
+ };
+-extern struct nfnl_ct_hook __rcu *nfnl_ct_hook;
++extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
+ /**
+  * nf_skb_duplicated - TEE target has sent a packet
+diff --git a/net/netfilter/core.c b/net/netfilter/core.c
+index 5396d27ba6a71..aa3f7d3228fda 100644
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -632,14 +632,14 @@ EXPORT_SYMBOL(nf_hook_slow_list);
+ /* This needs to be compiled in any case to avoid dependencies between the
+  * nfnetlink_queue code and nf_conntrack.
+  */
+-struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
++const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+ EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+-struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
++const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_ct_hook);
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+-struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
++const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_nat_hook);
+ /* This does not belong here, but locally generated errors need it if connection
+@@ -662,7 +662,7 @@ EXPORT_SYMBOL(nf_ct_attach);
+ void nf_conntrack_destroy(struct nf_conntrack *nfct)
+ {
+-      struct nf_ct_hook *ct_hook;
++      const struct nf_ct_hook *ct_hook;
+       rcu_read_lock();
+       ct_hook = rcu_dereference(nf_ct_hook);
+@@ -677,7 +677,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy);
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                        const struct sk_buff *skb)
+ {
+-      struct nf_ct_hook *ct_hook;
++      const struct nf_ct_hook *ct_hook;
+       bool ret = false;
+       rcu_read_lock();
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 779e41d1afdce..2a4222eefc894 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -2145,9 +2145,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
+                                struct nf_conn *ct,
+                                enum ip_conntrack_info ctinfo)
+ {
++      const struct nf_nat_hook *nat_hook;
+       struct nf_conntrack_tuple_hash *h;
+       struct nf_conntrack_tuple tuple;
+-      struct nf_nat_hook *nat_hook;
+       unsigned int status;
+       int dataoff;
+       u16 l3num;
+@@ -2833,7 +2833,7 @@ int nf_conntrack_init_start(void)
+       return ret;
+ }
+-static struct nf_ct_hook nf_conntrack_hook = {
++static const struct nf_ct_hook nf_conntrack_hook = {
+       .update         = nf_conntrack_update,
+       .destroy        = nf_ct_destroy,
+       .get_tuple_skb  = nf_conntrack_get_tuple_skb,
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index c427f7625a3b5..1466015bc56dc 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -1816,7 +1816,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
+                         const struct nlattr *attr)
+       __must_hold(RCU)
+ {
+-      struct nf_nat_hook *nat_hook;
++      const struct nf_nat_hook *nat_hook;
+       int err;
+       nat_hook = rcu_dereference(nf_nat_hook);
+@@ -2922,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+       nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+ }
+-static struct nfnl_ct_hook ctnetlink_glue_hook = {
++static const struct nfnl_ct_hook ctnetlink_glue_hook = {
+       .build_size     = ctnetlink_glue_build_size,
+       .build          = ctnetlink_glue_build,
+       .parse          = ctnetlink_glue_parse,
+diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
+index 2731176839228..b776b3af78ca2 100644
+--- a/net/netfilter/nf_nat_core.c
++++ b/net/netfilter/nf_nat_core.c
+@@ -1120,7 +1120,7 @@ static struct pernet_operations nat_net_ops = {
+       .size = sizeof(struct nat_net),
+ };
+-static struct nf_nat_hook nat_hook = {
++static const struct nf_nat_hook nat_hook = {
+       .parse_nat_setup        = nfnetlink_parse_nat_setup,
+ #ifdef CONFIG_XFRM
+       .decode_session         = __nf_nat_decode_session,
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index f4468ef3d0a94..8c96e01f6a023 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+ {
+-      struct nf_ct_hook *ct_hook;
++      const struct nf_ct_hook *ct_hook;
+       int err;
+       if (verdict == NF_ACCEPT ||
+@@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+       struct net_device *outdev;
+       struct nf_conn *ct = NULL;
+       enum ip_conntrack_info ctinfo = 0;
+-      struct nfnl_ct_hook *nfnl_ct;
++      const struct nfnl_ct_hook *nfnl_ct;
+       bool csum_verify;
+       char *secdata = NULL;
+       u32 seclen = 0;
+@@ -1115,7 +1115,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
+       return 0;
+ }
+-static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
++static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
+                                     const struct nlmsghdr *nlh,
+                                     const struct nlattr * const nfqa[],
+                                     struct nf_queue_entry *entry,
+@@ -1182,11 +1182,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
+ {
+       struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+       u_int16_t queue_num = ntohs(info->nfmsg->res_id);
++      const struct nfnl_ct_hook *nfnl_ct;
+       struct nfqnl_msg_verdict_hdr *vhdr;
+       enum ip_conntrack_info ctinfo;
+       struct nfqnl_instance *queue;
+       struct nf_queue_entry *entry;
+-      struct nfnl_ct_hook *nfnl_ct;
+       struct nf_conn *ct = NULL;
+       unsigned int verdict;
+       int err;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch b/queue-5.15/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch
new file mode 100644 (file)
index 0000000..6378809
--- /dev/null
@@ -0,0 +1,108 @@
+From 35cc9aa0daa8cad7022e1c2a6d76448d4d912e79 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Feb 2024 10:33:08 +0000
+Subject: netfilter: nf_tables: allow NFPROTO_INET in
+ nft_(match/target)_validate()
+
+From: Ignat Korchagin <ignat@cloudflare.com>
+
+[ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ]
+
+Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added
+some validation of NFPROTO_* families in the nft_compat module, but it broke
+the ability to use legacy iptables modules in dual-stack nftables.
+
+While with legacy iptables one had to independently manage IPv4 and IPv6
+tables, with nftables it is possible to have dual-stack tables sharing the
+rules. Moreover, it was possible to use rules based on legacy iptables
+match/target modules in dual-stack nftables.
+
+As an example, the program from [2] creates an INET dual-stack family table
+using an xt_bpf based rule, which looks like the following (the actual output
+was generated with a patched nft tool as the current nft tool does not parse
+dual stack tables with legacy match rules, so consider it for illustrative
+purposes only):
+
+table inet testfw {
+  chain input {
+    type filter hook prerouting priority filter; policy accept;
+    bytecode counter packets 0 bytes 0 accept
+  }
+}
+
+After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get
+EOPNOTSUPP for the above program.
+
+Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also
+restrict the functions to classic iptables hooks.
+
+Changes in v3:
+  * clarify that upstream nft will not display such configuration properly and
+    that the output was generated with a patched nft tool
+  * remove example program from commit description and link to it instead
+  * no code changes otherwise
+
+Changes in v2:
+  * restrict nft_(match/target)_validate() to classic iptables hooks
+  * rewrite example program to use unmodified libnftnl
+
+Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family")
+Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1]
+Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2]
+Reported-by: Jordan Griege <jgriege@cloudflare.com>
+Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_compat.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
+index 64a2a5f195896..aee046e00bfaf 100644
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
+       if (ctx->family != NFPROTO_IPV4 &&
+           ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET &&
+           ctx->family != NFPROTO_BRIDGE &&
+           ctx->family != NFPROTO_ARP)
+               return -EOPNOTSUPP;
++      ret = nft_chain_validate_hooks(ctx->chain,
++                                     (1 << NF_INET_PRE_ROUTING) |
++                                     (1 << NF_INET_LOCAL_IN) |
++                                     (1 << NF_INET_FORWARD) |
++                                     (1 << NF_INET_LOCAL_OUT) |
++                                     (1 << NF_INET_POST_ROUTING));
++      if (ret)
++              return ret;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
+@@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
+       if (ctx->family != NFPROTO_IPV4 &&
+           ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET &&
+           ctx->family != NFPROTO_BRIDGE &&
+           ctx->family != NFPROTO_ARP)
+               return -EOPNOTSUPP;
++      ret = nft_chain_validate_hooks(ctx->chain,
++                                     (1 << NF_INET_PRE_ROUTING) |
++                                     (1 << NF_INET_LOCAL_IN) |
++                                     (1 << NF_INET_FORWARD) |
++                                     (1 << NF_INET_LOCAL_OUT) |
++                                     (1 << NF_INET_POST_ROUTING));
++      if (ret)
++              return ret;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
+-- 
+2.43.0
+
diff --git a/queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch b/queue-5.15/netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch
new file mode 100644 (file)
index 0000000..cf15e5d
--- /dev/null
@@ -0,0 +1,40 @@
+From c2690174b5a63513a27ed277b2ffae498151a2db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Nov 2021 13:04:03 +0100
+Subject: netfilter: nfnetlink_queue: silence bogus compiler warning
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit b43c2793f5e9910862e8fe07846b74e45b104501 ]
+
+net/netfilter/nfnetlink_queue.c:601:36: warning: variable 'ctinfo' is
+uninitialized when used here [-Wuninitialized]
+   if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
+
+ctinfo is only uninitialized if ct == NULL.  Init it to 0 to silence this.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 5329ebf19a18b..f4468ef3d0a94 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+       struct net_device *indev;
+       struct net_device *outdev;
+       struct nf_conn *ct = NULL;
+-      enum ip_conntrack_info ctinfo;
++      enum ip_conntrack_info ctinfo = 0;
+       struct nfnl_ct_hook *nfnl_ct;
+       bool csum_verify;
+       char *secdata = NULL;
+-- 
+2.43.0
+
diff --git a/queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch b/queue-5.15/netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch
new file mode 100644 (file)
index 0000000..e790b76
--- /dev/null
@@ -0,0 +1,134 @@
+From 237b2b023ee1bef92716db824b94220a99b18a0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 16:40:48 +0900
+Subject: netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
+
+From: Ryosuke Yasuoka <ryasuoka@redhat.com>
+
+[ Upstream commit 661779e1fcafe1b74b3f3fe8e980c1e207fea1fd ]
+
+syzbot reported the following uninit-value access issue [1]:
+
+netlink_to_full_skb() creates a new `skb` and puts the `skb->data`
+passed as a 1st arg of netlink_to_full_skb() onto new `skb`. The data
+size is specified as `len` and passed to skb_put_data(). This `len`
+is based on `skb->end` that is not data offset but buffer offset. The
+`skb->end` contains data and tailroom. Since the tailroom is not
+initialized when the new `skb` created, KMSAN detects uninitialized
+memory area when copying the data.
+
+This patch resolved this issue by correct the len from `skb->end` to
+`skb->len`, which is the actual data offset.
+
+BUG: KMSAN: kernel-infoleak-after-free in instrument_copy_to_user include/linux/instrumented.h:114 [inline]
+BUG: KMSAN: kernel-infoleak-after-free in copy_to_user_iter lib/iov_iter.c:24 [inline]
+BUG: KMSAN: kernel-infoleak-after-free in iterate_ubuf include/linux/iov_iter.h:29 [inline]
+BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance2 include/linux/iov_iter.h:245 [inline]
+BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance include/linux/iov_iter.h:271 [inline]
+BUG: KMSAN: kernel-infoleak-after-free in _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186
+ instrument_copy_to_user include/linux/instrumented.h:114 [inline]
+ copy_to_user_iter lib/iov_iter.c:24 [inline]
+ iterate_ubuf include/linux/iov_iter.h:29 [inline]
+ iterate_and_advance2 include/linux/iov_iter.h:245 [inline]
+ iterate_and_advance include/linux/iov_iter.h:271 [inline]
+ _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186
+ copy_to_iter include/linux/uio.h:197 [inline]
+ simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:532
+ __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:420
+ skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546
+ skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline]
+ packet_recvmsg+0xd9c/0x2000 net/packet/af_packet.c:3482
+ sock_recvmsg_nosec net/socket.c:1044 [inline]
+ sock_recvmsg net/socket.c:1066 [inline]
+ sock_read_iter+0x467/0x580 net/socket.c:1136
+ call_read_iter include/linux/fs.h:2014 [inline]
+ new_sync_read fs/read_write.c:389 [inline]
+ vfs_read+0x8f6/0xe00 fs/read_write.c:470
+ ksys_read+0x20f/0x4c0 fs/read_write.c:613
+ __do_sys_read fs/read_write.c:623 [inline]
+ __se_sys_read fs/read_write.c:621 [inline]
+ __x64_sys_read+0x93/0xd0 fs/read_write.c:621
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+Uninit was stored to memory at:
+ skb_put_data include/linux/skbuff.h:2622 [inline]
+ netlink_to_full_skb net/netlink/af_netlink.c:181 [inline]
+ __netlink_deliver_tap_skb net/netlink/af_netlink.c:298 [inline]
+ __netlink_deliver_tap+0x5be/0xc90 net/netlink/af_netlink.c:325
+ netlink_deliver_tap net/netlink/af_netlink.c:338 [inline]
+ netlink_deliver_tap_kernel net/netlink/af_netlink.c:347 [inline]
+ netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline]
+ netlink_unicast+0x10f1/0x1250 net/netlink/af_netlink.c:1368
+ netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ __sock_sendmsg net/socket.c:745 [inline]
+ ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584
+ ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638
+ __sys_sendmsg net/socket.c:2667 [inline]
+ __do_sys_sendmsg net/socket.c:2676 [inline]
+ __se_sys_sendmsg net/socket.c:2674 [inline]
+ __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+Uninit was created at:
+ free_pages_prepare mm/page_alloc.c:1087 [inline]
+ free_unref_page_prepare+0xb0/0xa40 mm/page_alloc.c:2347
+ free_unref_page_list+0xeb/0x1100 mm/page_alloc.c:2533
+ release_pages+0x23d3/0x2410 mm/swap.c:1042
+ free_pages_and_swap_cache+0xd9/0xf0 mm/swap_state.c:316
+ tlb_batch_pages_flush mm/mmu_gather.c:98 [inline]
+ tlb_flush_mmu_free mm/mmu_gather.c:293 [inline]
+ tlb_flush_mmu+0x6f5/0x980 mm/mmu_gather.c:300
+ tlb_finish_mmu+0x101/0x260 mm/mmu_gather.c:392
+ exit_mmap+0x49e/0xd30 mm/mmap.c:3321
+ __mmput+0x13f/0x530 kernel/fork.c:1349
+ mmput+0x8a/0xa0 kernel/fork.c:1371
+ exit_mm+0x1b8/0x360 kernel/exit.c:567
+ do_exit+0xd57/0x4080 kernel/exit.c:858
+ do_group_exit+0x2fd/0x390 kernel/exit.c:1021
+ __do_sys_exit_group kernel/exit.c:1032 [inline]
+ __se_sys_exit_group kernel/exit.c:1030 [inline]
+ __x64_sys_exit_group+0x3c/0x50 kernel/exit.c:1030
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+
+Bytes 3852-3903 of 3904 are uninitialized
+Memory access of size 3904 starts at ffff88812ea1e000
+Data copied to user address 0000000020003280
+
+CPU: 1 PID: 5043 Comm: syz-executor297 Not tainted 6.7.0-rc5-syzkaller-00047-g5bd7ef53ffe5 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
+
+Fixes: 1853c9496460 ("netlink, mmap: transform mmap skb into full skb on taps")
+Reported-and-tested-by: syzbot+34ad5fab48f7bf510349@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=34ad5fab48f7bf510349 [1]
+Signed-off-by: Ryosuke Yasuoka <ryasuoka@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240221074053.1794118-1-ryasuoka@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netlink/af_netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 2169a9c3da1c3..82df02695bbdd 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -165,7 +165,7 @@ static inline u32 netlink_group_mask(u32 group)
+ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
+                                          gfp_t gfp_mask)
+ {
+-      unsigned int len = skb_end_offset(skb);
++      unsigned int len = skb->len;
+       struct sk_buff *new;
+       new = alloc_skb(len, gfp_mask);
+-- 
+2.43.0
+
diff --git a/queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch b/queue-5.15/power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch
new file mode 100644 (file)
index 0000000..c76a7b7
--- /dev/null
@@ -0,0 +1,51 @@
+From 1fa93963ef55c9a02940494d60aa163c7e3b573e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Feb 2024 16:51:33 +0100
+Subject: power: supply: bq27xxx-i2c: Do not free non existing IRQ
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit 2df70149e73e79783bcbc7db4fa51ecef0e2022c ]
+
+The bq27xxx i2c-client may not have an IRQ, in which case
+client->irq will be 0. bq27xxx_battery_i2c_probe() already has
+an if (client->irq) check wrapping the request_threaded_irq().
+
+But bq27xxx_battery_i2c_remove() unconditionally calls
+free_irq(client->irq) leading to:
+
+[  190.310742] ------------[ cut here ]------------
+[  190.310843] Trying to free already-free IRQ 0
+[  190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310
+
+Followed by a backtrace when unbinding the driver. Add
+an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring
+probe() to fix this.
+
+Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove")
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
+index b722ee2d7e142..4e5d773b3bf8d 100644
+--- a/drivers/power/supply/bq27xxx_battery_i2c.c
++++ b/drivers/power/supply/bq27xxx_battery_i2c.c
+@@ -209,7 +209,9 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
+ {
+       struct bq27xxx_device_info *di = i2c_get_clientdata(client);
+-      free_irq(client->irq, di);
++      if (client->irq)
++              free_irq(client->irq, di);
++
+       bq27xxx_battery_teardown(di);
+       mutex_lock(&battery_mutex);
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch b/queue-5.15/riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch
new file mode 100644 (file)
index 0000000..c388a23
--- /dev/null
@@ -0,0 +1,50 @@
+From 24732c67b0a17642fdcd5bd6aa3dd8e80c03f671 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Feb 2024 21:17:23 +0200
+Subject: riscv: Sparse-Memory/vmemmap out-of-bounds fix
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dimitris Vlachos <dvlachos@ics.forth.gr>
+
+[ Upstream commit a11dd49dcb9376776193e15641f84fcc1e5980c9 ]
+
+Offset vmemmap so that the first page of vmemmap will be mapped
+to the first page of physical memory in order to ensure that
+vmemmap’s bounds will be respected during
+pfn_to_page()/page_to_pfn() operations.
+The conversion macros will produce correct SV39/48/57 addresses
+for every possible/valid DRAM_BASE inside the physical memory limits.
+
+v2:Address Alex's comments
+
+Suggested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Dimitris Vlachos <dvlachos@ics.forth.gr>
+Reported-by: Dimitris Vlachos <dvlachos@ics.forth.gr>
+Closes: https://lore.kernel.org/linux-riscv/20240202135030.42265-1-csd4492@csd.uoc.gr
+Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20240229191723.32779-1-dvlachos@ics.forth.gr
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/pgtable.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
+index 397cb945b16eb..9a3d9b68f2ff4 100644
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -58,7 +58,7 @@
+  * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+  * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
+  */
+-#define vmemmap               ((struct page *)VMEMMAP_START)
++#define vmemmap               ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+ #define PCI_IO_SIZE      SZ_16M
+ #define PCI_IO_END       VMEMMAP_START
+-- 
+2.43.0
+
diff --git a/queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch b/queue-5.15/rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch
new file mode 100644 (file)
index 0000000..b0ffc18
--- /dev/null
@@ -0,0 +1,83 @@
+From ac5cea766854656b623a23a5d1370b9ea54d0fce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Feb 2024 20:11:28 +0800
+Subject: rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 743ad091fb46e622f1b690385bb15e3cd3daf874 ]
+
+In the commit d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks
+IFLA_BRIDGE_MODE length"), an adjustment was made to the old loop logic
+in the function `rtnl_bridge_setlink` to enable the loop to also check
+the length of the IFLA_BRIDGE_MODE attribute. However, this adjustment
+removed the `break` statement and led to an error logic of the flags
+writing back at the end of this function.
+
+if (have_flags)
+    memcpy(nla_data(attr), &flags, sizeof(flags));
+    // attr should point to IFLA_BRIDGE_FLAGS NLA !!!
+
+Before the mentioned commit, the `attr` is granted to be IFLA_BRIDGE_FLAGS.
+However, this is not necessarily true fow now as the updated loop will let
+the attr point to the last NLA, even an invalid NLA which could cause
+overflow writes.
+
+This patch introduces a new variable `br_flag` to save the NLA pointer
+that points to IFLA_BRIDGE_FLAGS and uses it to resolve the mentioned
+error logic.
+
+Fixes: d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20240227121128.608110-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 1b71e5c582bbc..ef218e290dfba 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4925,10 +4925,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+       struct net *net = sock_net(skb->sk);
+       struct ifinfomsg *ifm;
+       struct net_device *dev;
+-      struct nlattr *br_spec, *attr = NULL;
++      struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
+       int rem, err = -EOPNOTSUPP;
+       u16 flags = 0;
+-      bool have_flags = false;
+       if (nlmsg_len(nlh) < sizeof(*ifm))
+               return -EINVAL;
+@@ -4946,11 +4945,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+       br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+       if (br_spec) {
+               nla_for_each_nested(attr, br_spec, rem) {
+-                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
++                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
+                               if (nla_len(attr) < sizeof(flags))
+                                       return -EINVAL;
+-                              have_flags = true;
++                              br_flags_attr = attr;
+                               flags = nla_get_u16(attr);
+                       }
+@@ -4994,8 +4993,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+               }
+       }
+-      if (have_flags)
+-              memcpy(nla_data(attr), &flags, sizeof(flags));
++      if (br_flags_attr)
++              memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
+ out:
+       return err;
+ }
+-- 
+2.43.0
+
index 42d7784f547cf9d3335d32b5a5c80430d14d92c5..85fd4674e45eb2f20b62fb968df988a895c3d456 100644 (file)
@@ -1 +1,46 @@
 netfilter-nf_tables-disallow-timeout-for-anonymous-sets.patch
+mtd-spinand-gigadevice-fix-the-get-ecc-status-issue.patch
+netlink-fix-kernel-infoleak-after-free-in-__skb_data.patch
+net-ip_tunnel-prevent-perpetual-headroom-growth.patch
+tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch
+cpufreq-intel_pstate-fix-pstate-limits-enforcement-f.patch
+net-veth-clear-gro-when-clearing-xdp-even-when-down.patch
+ipv6-fix-potential-struct-net-leak-in-inet6_rtm_geta.patch
+lan78xx-enable-auto-speed-configuration-for-lan7850-.patch
+net-enable-memcg-accounting-for-veth-queues.patch
+veth-try-harder-when-allocating-queue-memory.patch
+net-usb-dm9601-fix-wrong-return-value-in-dm9601_mdio.patch
+uapi-in6-replace-temporary-label-with-rfc9486.patch
+stmmac-clear-variable-when-destroying-workqueue.patch
+bluetooth-avoid-potential-use-after-free-in-hci_erro.patch
+bluetooth-hci_event-fix-wrongly-recorded-wakeup-bd_a.patch
+bluetooth-hci_event-fix-handling-of-hci_ev_io_capa_r.patch
+bluetooth-enforce-validation-on-max-value-of-connect.patch
+netfilter-nf_tables-allow-nfproto_inet-in-nft_-match.patch
+netfilter-nfnetlink_queue-silence-bogus-compiler-war.patch
+netfilter-core-move-ip_ct_attach-indirection-to-stru.patch
+netfilter-make-function-op-structures-const.patch
+netfilter-let-reset-rules-clean-out-conntrack-entrie.patch
+netfilter-bridge-confirm-multicast-packets-before-pa.patch
+rtnetlink-fix-error-logic-of-ifla_bridge_flags-writi.patch
+igb-extend-ptp-timestamp-adjustments-to-i211.patch
+tls-rx-don-t-store-the-record-type-in-socket-context.patch
+tls-rx-don-t-store-the-decryption-status-in-socket-c.patch
+tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch
+tls-rx-refactor-decrypt_skb_update.patch
+tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch
+tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch
+tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch
+tls-rx-wrap-decryption-arguments-in-a-structure.patch
+tls-rx-factor-out-writing-contenttype-to-cmsg.patch
+tls-rx-don-t-track-the-async-count.patch
+tls-rx-move-counting-tlsdecrypterrors-for-sync.patch
+tls-rx-assume-crypto-always-calls-our-callback.patch
+tls-rx-use-async-as-an-in-out-argument.patch
+tls-decrement-decrypt_pending-if-no-async-completion.patch
+efi-capsule-loader-fix-incorrect-allocation-size.patch
+power-supply-bq27xxx-i2c-do-not-free-non-existing-ir.patch
+alsa-drop-leftover-snd-rtctimer-stuff-from-makefile.patch
+fbcon-always-restore-the-old-font-data-in-fbcon_do_s.patch
+afs-fix-endless-loop-in-directory-parsing.patch
+riscv-sparse-memory-vmemmap-out-of-bounds-fix.patch
diff --git a/queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch b/queue-5.15/stmmac-clear-variable-when-destroying-workqueue.patch
new file mode 100644 (file)
index 0000000..d96658f
--- /dev/null
@@ -0,0 +1,83 @@
+From dea96a3b376af58a1727fb971f53406a7c2d1e67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Feb 2024 17:42:32 +0100
+Subject: stmmac: Clear variable when destroying workqueue
+
+From: Jakub Raczynski <j.raczynski@samsung.com>
+
+[ Upstream commit 8af411bbba1f457c33734795f024d0ef26d0963f ]
+
+Currently when suspending driver and stopping workqueue it is checked whether
+workqueue is not NULL and if so, it is destroyed.
+Function destroy_workqueue() does drain queue and does clear variable, but
+it does not set workqueue variable to NULL. This can cause kernel/module
+panic if code attempts to clear workqueue that was not initialized.
+
+This scenario is possible when resuming suspended driver in stmmac_resume(),
+because there is no handling for failed stmmac_hw_setup(),
+which can fail and return if DMA engine has failed to initialize,
+and workqueue is initialized after DMA engine.
+Should DMA engine fail to initialize, resume will proceed normally,
+but interface won't work and TX queue will eventually timeout,
+causing 'Reset adapter' error.
+This then does destroy workqueue during reset process.
+And since workqueue is initialized after DMA engine and can be skipped,
+it will cause kernel/module panic.
+
+To secure against this possible crash, set workqueue variable to NULL when
+destroying workqueue.
+
+Log/backtrace from crash goes as follows:
+[88.031977]------------[ cut here ]------------
+[88.031985]NETDEV WATCHDOG: eth0 (sxgmac): transmit queue 1 timed out
+[88.032017]WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:477 dev_watchdog+0x390/0x398
+           <Skipping backtrace for watchdog timeout>
+[88.032251]---[ end trace e70de432e4d5c2c0 ]---
+[88.032282]sxgmac 16d88000.ethernet eth0: Reset adapter.
+[88.036359]------------[ cut here ]------------
+[88.036519]Call trace:
+[88.036523] flush_workqueue+0x3e4/0x430
+[88.036528] drain_workqueue+0xc4/0x160
+[88.036533] destroy_workqueue+0x40/0x270
+[88.036537] stmmac_fpe_stop_wq+0x4c/0x70
+[88.036541] stmmac_release+0x278/0x280
+[88.036546] __dev_close_many+0xcc/0x158
+[88.036551] dev_close_many+0xbc/0x190
+[88.036555] dev_close.part.0+0x70/0xc0
+[88.036560] dev_close+0x24/0x30
+[88.036564] stmmac_service_task+0x110/0x140
+[88.036569] process_one_work+0x1d8/0x4a0
+[88.036573] worker_thread+0x54/0x408
+[88.036578] kthread+0x164/0x170
+[88.036583] ret_from_fork+0x10/0x20
+[88.036588]---[ end trace e70de432e4d5c2c1 ]---
+[88.036597]Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
+
+Fixes: 5a5586112b929 ("net: stmmac: support FPE link partner hand-shaking procedure")
+Signed-off-by: Jakub Raczynski <j.raczynski@samsung.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index a1c1e353ca072..b0ab8f6986f8b 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -3825,8 +3825,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+ {
+       set_bit(__FPE_REMOVING, &priv->fpe_task_state);
+-      if (priv->fpe_wq)
++      if (priv->fpe_wq) {
+               destroy_workqueue(priv->fpe_wq);
++              priv->fpe_wq = NULL;
++      }
+       netdev_info(priv->dev, "FPE workqueue stop");
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch b/queue-5.15/tls-decrement-decrypt_pending-if-no-async-completion.patch
new file mode 100644 (file)
index 0000000..fdd395f
--- /dev/null
@@ -0,0 +1,42 @@
+From 1ff365db9e76d0b1fa3372803386e06c4e2e34bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Feb 2024 23:43:57 +0100
+Subject: tls: decrement decrypt_pending if no async completion will be called
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit f7fa16d49837f947ee59492958f9e6f0e51d9a78 ]
+
+With mixed sync/async decryption, or failures of crypto_aead_decrypt,
+we increment decrypt_pending but we never do the corresponding
+decrement since tls_decrypt_done will not be called. In this case, we
+should decrement decrypt_pending immediately to avoid getting stuck.
+
+For example, the prequeue prequeue test gets stuck with mixed
+modes (one async decrypt + one sync decrypt).
+
+Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Link: https://lore.kernel.org/r/c56d5fc35543891d5319f834f25622360e1bfbec.1709132643.git.sd@queasysnail.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index a1a99f9f093b1..83319a3b8bdd1 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk,
+                       return 0;
+               ret = crypto_wait_req(ret, &ctx->async_wait);
++      } else if (darg->async) {
++              atomic_dec(&ctx->decrypt_pending);
+       }
+       darg->async = false;
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch b/queue-5.15/tls-hw-rx-use-return-value-of-tls_device_decrypted-t.patch
new file mode 100644 (file)
index 0000000..0da1942
--- /dev/null
@@ -0,0 +1,76 @@
+From ee9dec9a54e9e842b9958aa991a0e679e73f8e98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 20:38:23 -0700
+Subject: tls: hw: rx: use return value of tls_device_decrypted() to carry
+ status
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 71471ca32505afa7c3f7f6a8268716e1ddb81cd4 ]
+
+Instead of tls_device poking into internals of the message
+return 1 from tls_device_decrypted() if the device handled
+the decryption.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_device.c | 7 ++-----
+ net/tls/tls_sw.c     | 5 ++---
+ 2 files changed, 4 insertions(+), 8 deletions(-)
+
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index f23d18e666284..e7c361807590d 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -936,7 +936,6 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                        struct sk_buff *skb, struct strp_msg *rxm)
+ {
+       struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
+-      struct tls_msg *tlm = tls_msg(skb);
+       int is_decrypted = skb->decrypted;
+       int is_encrypted = !is_decrypted;
+       struct sk_buff *skb_iter;
+@@ -951,11 +950,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                                  tls_ctx->rx.rec_seq, rxm->full_len,
+                                  is_encrypted, is_decrypted);
+-      tlm->decrypted |= is_decrypted;
+-
+       if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
+               if (likely(is_encrypted || is_decrypted))
+-                      return 0;
++                      return is_decrypted;
+               /* After tls_device_down disables the offload, the next SKB will
+                * likely have initial fragments decrypted, and final ones not
+@@ -970,7 +967,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+        */
+       if (is_decrypted) {
+               ctx->resync_nh_reset = 1;
+-              return 0;
++              return is_decrypted;
+       }
+       if (is_encrypted) {
+               tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 7da17dd7c38b9..eed32ef3ca4a0 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1571,9 +1571,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+               err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
+               if (err < 0)
+                       return err;
+-
+-              /* skip SW decryption if NIC handled it already */
+-              if (tlm->decrypted) {
++              if (err > 0) {
++                      tlm->decrypted = 1;
+                       *zc = false;
+                       goto decrypt_done;
+               }
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch b/queue-5.15/tls-rx-assume-crypto-always-calls-our-callback.patch
new file mode 100644 (file)
index 0000000..8581b30
--- /dev/null
@@ -0,0 +1,38 @@
+From 4d8900bf4b8d44dab517788ebb440b614a6a719c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 12:19:12 -0700
+Subject: tls: rx: assume crypto always calls our callback
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1c699ffa48a15710746989c36a82cbfb07e8d17f ]
+
+If crypto didn't always invoke our callback for async
+we'd not be clearing skb->sk and would crash in the
+skb core when freeing it. This if must be dead code.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 85fa49170b4e5..27ac27daec868 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -277,9 +277,6 @@ static int tls_do_decryption(struct sock *sk,
+       if (ret == -EBADMSG)
+               TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
+-      if (async)
+-              atomic_dec(&ctx->decrypt_pending);
+-
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch b/queue-5.15/tls-rx-don-t-issue-wake-ups-when-data-is-decrypted.patch
new file mode 100644 (file)
index 0000000..52b00bd
--- /dev/null
@@ -0,0 +1,47 @@
+From b89682750d6bee89bbcf232970f6d5770424ad76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 20:38:21 -0700
+Subject: tls: rx: don't issue wake ups when data is decrypted
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5dbda02d322db7762f1a0348117cde913fb46c13 ]
+
+We inform the applications that data is available when
+the record is received. Decryption happens inline inside
+recvmsg or splice call. Generating another wakeup inside
+the decryption handler seems pointless as someone must
+be actively reading the socket if we are executing this
+code.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 0a6630bbef53e..5fdc4f5193ee5 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1557,7 +1557,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+                             bool async)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+-      struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       struct strp_msg *rxm = strp_msg(skb);
+       struct tls_msg *tlm = tls_msg(skb);
+@@ -1596,7 +1595,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+               rxm->full_len -= prot->overhead_size;
+               tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+               tlm->decrypted = 1;
+-              ctx->saved_data_ready(sk);
+       } else {
+               *zc = false;
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch b/queue-5.15/tls-rx-don-t-report-text-length-from-the-bowels-of-d.patch
new file mode 100644 (file)
index 0000000..d44221f
--- /dev/null
@@ -0,0 +1,177 @@
+From c4bd2ea6944d8b91a240bb02187ea60feec48ead Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Apr 2022 11:31:25 -0700
+Subject: tls: rx: don't report text length from the bowels of decrypt
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 9bdf75ccffa690237cd0b472cd598cf6d22873dc ]
+
+We plumb pointer to chunk all the way to the decryption method.
+It's set to the length of the text when decrypt_skb_update()
+returns.
+
+I think the code is written this way because original TLS
+implementation passed &chunk to zerocopy_from_iter() and this
+was carried forward as the code gotten more complex, without
+any refactoring.
+
+The fix for peek() introduced a new variable - to_decrypt
+which for all practical purposes is what chunk is going to
+get set to. Spare ourselves the pointer passing, use to_decrypt.
+
+Use this opportunity to clean things up a little further.
+
+Note that chunk / to_decrypt was mostly needed for the async
+path, since the sync path would access rxm->full_len (decryption
+transforms full_len from record size to text size). Use the
+right source of truth more explicitly.
+
+We have three cases:
+ - async - it's TLS 1.2 only, so chunk == to_decrypt, but we
+           need the min() because to_decrypt is a whole record
+          and we don't want to underflow len. Note that we can't
+          handle partial record by falling back to sync as it
+          would introduce reordering against records in flight.
+ - zc - again, TLS 1.2 only for now, so chunk == to_decrypt,
+        we don't do zc if len < to_decrypt, no need to check again.
+ - normal - it already handles chunk > len, we can factor out the
+            assignment to rxm->full_len and share it with zc.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 33 ++++++++++++++-------------------
+ 1 file changed, 14 insertions(+), 19 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index cf09f147f5a09..fc1fa98d21937 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1415,7 +1415,7 @@ static int tls_setup_from_iter(struct iov_iter *from,
+ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                           struct iov_iter *out_iov,
+                           struct scatterlist *out_sg,
+-                          int *chunk, bool *zc, bool async)
++                          bool *zc, bool async)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+@@ -1522,7 +1522,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                                                 (n_sgout - 1));
+                       if (err < 0)
+                               goto fallback_to_reg_recv;
+-                      *chunk = data_len;
+               } else if (out_sg) {
+                       memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
+               } else {
+@@ -1532,7 +1531,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+ fallback_to_reg_recv:
+               sgout = sgin;
+               pages = 0;
+-              *chunk = data_len;
+               *zc = false;
+       }
+@@ -1551,8 +1549,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+ }
+ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+-                            struct iov_iter *dest, int *chunk, bool *zc,
+-                            bool async)
++                            struct iov_iter *dest, bool *zc, bool async)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+@@ -1576,7 +1573,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+               }
+       }
+-      err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async);
++      err = decrypt_internal(sk, skb, dest, NULL, zc, async);
+       if (err < 0) {
+               if (err == -EINPROGRESS)
+                       tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+@@ -1603,9 +1600,8 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+               struct scatterlist *sgout)
+ {
+       bool zc = true;
+-      int chunk;
+-      return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
++      return decrypt_internal(sk, skb, NULL, sgout, &zc, false);
+ }
+ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+@@ -1795,9 +1791,8 @@ int tls_sw_recvmsg(struct sock *sk,
+       num_async = 0;
+       while (len && (decrypted + copied < target || ctx->recv_pkt)) {
+               bool retain_skb = false;
++              int to_decrypt, chunk;
+               bool zc = false;
+-              int to_decrypt;
+-              int chunk = 0;
+               bool async_capable;
+               bool async = false;
+@@ -1834,7 +1829,7 @@ int tls_sw_recvmsg(struct sock *sk,
+                       async_capable = false;
+               err = decrypt_skb_update(sk, skb, &msg->msg_iter,
+-                                       &chunk, &zc, async_capable);
++                                       &zc, async_capable);
+               if (err < 0 && err != -EINPROGRESS) {
+                       tls_err_abort(sk, -EBADMSG);
+                       goto recv_end;
+@@ -1872,8 +1867,13 @@ int tls_sw_recvmsg(struct sock *sk,
+                       }
+               }
+-              if (async)
++              if (async) {
++                      /* TLS 1.2-only, to_decrypt must be text length */
++                      chunk = min_t(int, to_decrypt, len);
+                       goto pick_next_record;
++              }
++              /* TLS 1.3 may have updated the length by more than overhead */
++              chunk = rxm->full_len;
+               if (!zc) {
+                       if (bpf_strp_enabled) {
+@@ -1889,11 +1889,9 @@ int tls_sw_recvmsg(struct sock *sk,
+                               }
+                       }
+-                      if (rxm->full_len > len) {
++                      if (chunk > len) {
+                               retain_skb = true;
+                               chunk = len;
+-                      } else {
+-                              chunk = rxm->full_len;
+                       }
+                       err = skb_copy_datagram_msg(skb, rxm->offset,
+@@ -1908,9 +1906,6 @@ int tls_sw_recvmsg(struct sock *sk,
+               }
+ pick_next_record:
+-              if (chunk > len)
+-                      chunk = len;
+-
+               decrypted += chunk;
+               len -= chunk;
+@@ -2011,7 +2006,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+               if (!skb)
+                       goto splice_read_end;
+-              err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
++              err = decrypt_skb_update(sk, skb, NULL, &zc, false);
+               if (err < 0) {
+                       tls_err_abort(sk, -EBADMSG);
+                       goto splice_read_end;
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch b/queue-5.15/tls-rx-don-t-store-the-decryption-status-in-socket-c.patch
new file mode 100644 (file)
index 0000000..6c57115
--- /dev/null
@@ -0,0 +1,116 @@
+From 6262e20920b50d8cc830964df259ff18380046d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 20:38:17 -0700
+Subject: tls: rx: don't store the decryption status in socket context
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 7dc59c33d62c4520a119051d4486c214ef5caa23 ]
+
+Similar justification to previous change, the information
+about decryption status belongs in the skb.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/strparser.h |  1 +
+ include/net/tls.h       |  1 -
+ net/tls/tls_device.c    |  3 ++-
+ net/tls/tls_sw.c        | 10 ++++++----
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/strparser.h b/include/net/strparser.h
+index c271543076cf8..a191486eb1e4c 100644
+--- a/include/net/strparser.h
++++ b/include/net/strparser.h
+@@ -72,6 +72,7 @@ struct sk_skb_cb {
+       u64 temp_reg;
+       struct tls_msg {
+               u8 control;
++              u8 decrypted;
+       } tls;
+ };
+diff --git a/include/net/tls.h b/include/net/tls.h
+index 24c1b718ceacc..ea0aeae26cf76 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -147,7 +147,6 @@ struct tls_sw_context_rx {
+       struct sk_buff *recv_pkt;
+       u8 async_capable:1;
+-      u8 decrypted:1;
+       atomic_t decrypt_pending;
+       /* protect crypto_wait with decrypt_pending*/
+       spinlock_t decrypt_compl_lock;
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index 88785196a8966..f23d18e666284 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -936,6 +936,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                        struct sk_buff *skb, struct strp_msg *rxm)
+ {
+       struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
++      struct tls_msg *tlm = tls_msg(skb);
+       int is_decrypted = skb->decrypted;
+       int is_encrypted = !is_decrypted;
+       struct sk_buff *skb_iter;
+@@ -950,7 +951,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+                                  tls_ctx->rx.rec_seq, rxm->full_len,
+                                  is_encrypted, is_decrypted);
+-      ctx->sw.decrypted |= is_decrypted;
++      tlm->decrypted |= is_decrypted;
+       if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
+               if (likely(is_encrypted || is_decrypted))
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 82d7c9b036bc7..0a6630bbef53e 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1560,9 +1560,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       struct strp_msg *rxm = strp_msg(skb);
++      struct tls_msg *tlm = tls_msg(skb);
+       int pad, err = 0;
+-      if (!ctx->decrypted) {
++      if (!tlm->decrypted) {
+               if (tls_ctx->rx_conf == TLS_HW) {
+                       err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
+                       if (err < 0)
+@@ -1570,7 +1571,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+               }
+               /* Still not decrypted after tls_device */
+-              if (!ctx->decrypted) {
++              if (!tlm->decrypted) {
+                       err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
+                                              async);
+                       if (err < 0) {
+@@ -1594,7 +1595,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+               rxm->offset += prot->prepend_size;
+               rxm->full_len -= prot->overhead_size;
+               tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+-              ctx->decrypted = 1;
++              tlm->decrypted = 1;
+               ctx->saved_data_ready(sk);
+       } else {
+               *zc = false;
+@@ -2137,8 +2138,9 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
++      struct tls_msg *tlm = tls_msg(skb);
+-      ctx->decrypted = 0;
++      tlm->decrypted = 0;
+       ctx->recv_pkt = skb;
+       strp_pause(strp);
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch b/queue-5.15/tls-rx-don-t-store-the-record-type-in-socket-context.patch
new file mode 100644 (file)
index 0000000..a647997
--- /dev/null
@@ -0,0 +1,241 @@
+From f550346a37e8a8e8b3dc78a2e78d0efb3b9cac0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 20:38:16 -0700
+Subject: tls: rx: don't store the record type in socket context
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit c3f6bb74137c68b515b7e2ff123a80611e801013 ]
+
+Original TLS implementation was handling one record at a time.
+It stashed the type of the record inside tls context (per socket
+structure) for convenience. When async crypto support was added
+[1] the author had to use skb->cb to store the type per-message.
+
+The use of skb->cb overlaps with strparser, however, so a hybrid
+approach was taken where type is stored in context while parsing
+(since we parse a message at a time) but once parsed its copied
+to skb->cb.
+
+Recently a workaround for sockmaps [2] exposed the previously
+private struct _strp_msg and started a trend of adding user
+fields directly in strparser's header. This is cleaner than
+storing information about an skb in the context.
+
+This change is not strictly necessary, but IMHO the ownership
+of the context field is confusing. Information naturally
+belongs to the skb.
+
+[1] commit 94524d8fc965 ("net/tls: Add support for async decryption of tls records")
+[2] commit b2c4618162ec ("bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg")
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/strparser.h |  3 +++
+ include/net/tls.h       | 10 +++-------
+ net/tls/tls_sw.c        | 38 +++++++++++++++++---------------------
+ 3 files changed, 23 insertions(+), 28 deletions(-)
+
+diff --git a/include/net/strparser.h b/include/net/strparser.h
+index 732b7097d78e4..c271543076cf8 100644
+--- a/include/net/strparser.h
++++ b/include/net/strparser.h
+@@ -70,6 +70,9 @@ struct sk_skb_cb {
+        * when dst_reg == src_reg.
+        */
+       u64 temp_reg;
++      struct tls_msg {
++              u8 control;
++      } tls;
+ };
+ static inline struct strp_msg *strp_msg(struct sk_buff *skb)
+diff --git a/include/net/tls.h b/include/net/tls.h
+index eda0015c5c592..24c1b718ceacc 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -116,11 +116,6 @@ struct tls_rec {
+       u8 aead_req_ctx[];
+ };
+-struct tls_msg {
+-      struct strp_msg rxm;
+-      u8 control;
+-};
+-
+ struct tx_work {
+       struct delayed_work work;
+       struct sock *sk;
+@@ -151,7 +146,6 @@ struct tls_sw_context_rx {
+       void (*saved_data_ready)(struct sock *sk);
+       struct sk_buff *recv_pkt;
+-      u8 control;
+       u8 async_capable:1;
+       u8 decrypted:1;
+       atomic_t decrypt_pending;
+@@ -410,7 +404,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
+ static inline struct tls_msg *tls_msg(struct sk_buff *skb)
+ {
+-      return (struct tls_msg *)strp_msg(skb);
++      struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb;
++
++      return &scb->tls;
+ }
+ static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index e6f700f67c010..82d7c9b036bc7 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -128,10 +128,10 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
+         return __skb_nsg(skb, offset, len, 0);
+ }
+-static int padding_length(struct tls_sw_context_rx *ctx,
+-                        struct tls_prot_info *prot, struct sk_buff *skb)
++static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb)
+ {
+       struct strp_msg *rxm = strp_msg(skb);
++      struct tls_msg *tlm = tls_msg(skb);
+       int sub = 0;
+       /* Determine zero-padding length */
+@@ -153,7 +153,7 @@ static int padding_length(struct tls_sw_context_rx *ctx,
+                       sub++;
+                       back++;
+               }
+-              ctx->control = content_type;
++              tlm->control = content_type;
+       }
+       return sub;
+ }
+@@ -187,7 +187,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
+               struct strp_msg *rxm = strp_msg(skb);
+               int pad;
+-              pad = padding_length(ctx, prot, skb);
++              pad = padding_length(prot, skb);
+               if (pad < 0) {
+                       ctx->async_wait.err = pad;
+                       tls_err_abort(skb->sk, pad);
+@@ -1423,6 +1423,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       struct strp_msg *rxm = strp_msg(skb);
++      struct tls_msg *tlm = tls_msg(skb);
+       int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
+       struct aead_request *aead_req;
+       struct sk_buff *unused;
+@@ -1500,7 +1501,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+       /* Prepare AAD */
+       tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+                    prot->tail_size,
+-                   tls_ctx->rx.rec_seq, ctx->control, prot);
++                   tls_ctx->rx.rec_seq, tlm->control, prot);
+       /* Prepare sgin */
+       sg_init_table(sgin, n_sgin);
+@@ -1585,7 +1586,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+                       *zc = false;
+               }
+-              pad = padding_length(ctx, prot, skb);
++              pad = padding_length(prot, skb);
+               if (pad < 0)
+                       return pad;
+@@ -1817,26 +1818,21 @@ int tls_sw_recvmsg(struct sock *sk,
+                               }
+                       }
+                       goto recv_end;
+-              } else {
+-                      tlm = tls_msg(skb);
+-                      if (prot->version == TLS_1_3_VERSION)
+-                              tlm->control = 0;
+-                      else
+-                              tlm->control = ctx->control;
+               }
+               rxm = strp_msg(skb);
++              tlm = tls_msg(skb);
+               to_decrypt = rxm->full_len - prot->overhead_size;
+               if (to_decrypt <= len && !is_kvec && !is_peek &&
+-                  ctx->control == TLS_RECORD_TYPE_DATA &&
++                  tlm->control == TLS_RECORD_TYPE_DATA &&
+                   prot->version != TLS_1_3_VERSION &&
+                   !bpf_strp_enabled)
+                       zc = true;
+               /* Do not use async mode if record is non-data */
+-              if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
++              if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
+                       async_capable = ctx->async_capable;
+               else
+                       async_capable = false;
+@@ -1851,8 +1847,6 @@ int tls_sw_recvmsg(struct sock *sk,
+               if (err == -EINPROGRESS) {
+                       async = true;
+                       num_async++;
+-              } else if (prot->version == TLS_1_3_VERSION) {
+-                      tlm->control = ctx->control;
+               }
+               /* If the type of records being processed is not known yet,
+@@ -1999,6 +1993,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct strp_msg *rxm = NULL;
+       struct sock *sk = sock->sk;
++      struct tls_msg *tlm;
+       struct sk_buff *skb;
+       ssize_t copied = 0;
+       bool from_queue;
+@@ -2027,14 +2022,15 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+               }
+       }
++      rxm = strp_msg(skb);
++      tlm = tls_msg(skb);
++
+       /* splice does not support reading control messages */
+-      if (ctx->control != TLS_RECORD_TYPE_DATA) {
++      if (tlm->control != TLS_RECORD_TYPE_DATA) {
+               err = -EINVAL;
+               goto splice_read_end;
+       }
+-      rxm = strp_msg(skb);
+-
+       chunk = min_t(unsigned int, rxm->full_len, len);
+       copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
+       if (copied < 0)
+@@ -2077,10 +2073,10 @@ bool tls_sw_sock_is_readable(struct sock *sk)
+ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+-      struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
+       struct strp_msg *rxm = strp_msg(skb);
++      struct tls_msg *tlm = tls_msg(skb);
+       size_t cipher_overhead;
+       size_t data_len = 0;
+       int ret;
+@@ -2101,7 +2097,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+       if (ret < 0)
+               goto read_failure;
+-      ctx->control = header[0];
++      tlm->control = header[0];
+       data_len = ((header[4] & 0xFF) | (header[3] << 8));
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-don-t-track-the-async-count.patch b/queue-5.15/tls-rx-don-t-track-the-async-count.patch
new file mode 100644 (file)
index 0000000..b338d82
--- /dev/null
@@ -0,0 +1,84 @@
+From aaacfbe1c45047270c216374c84a8149b10d630a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Apr 2022 11:31:30 -0700
+Subject: tls: rx: don't track the async count
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 7da18bcc5e4cfd14ea520367546c5697e64ae592 ]
+
+We track both if the last record was handled by async crypto
+and how many records were async. This is not necessary. We
+implicitly assume once crypto goes async it will stay that
+way, otherwise we'd reorder records. So just track if we're
+in async mode, the exact number of records is not necessary.
+
+This change also forces us into "async" mode more consistently
+in case crypto ever decided to interleave async and sync.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index ca71a9f559b37..d3bbae9af9f41 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1753,13 +1753,13 @@ int tls_sw_recvmsg(struct sock *sk,
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       struct sk_psock *psock;
+-      int num_async, pending;
+       unsigned char control = 0;
+       ssize_t decrypted = 0;
+       struct strp_msg *rxm;
+       struct tls_msg *tlm;
+       struct sk_buff *skb;
+       ssize_t copied = 0;
++      bool async = false;
+       int target, err = 0;
+       long timeo;
+       bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+@@ -1791,12 +1791,10 @@ int tls_sw_recvmsg(struct sock *sk,
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+       decrypted = 0;
+-      num_async = 0;
+       while (len && (decrypted + copied < target || ctx->recv_pkt)) {
+               struct tls_decrypt_arg darg = {};
+               bool retain_skb = false;
+               int to_decrypt, chunk;
+-              bool async;
+               skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
+               if (!skb) {
+@@ -1836,10 +1834,8 @@ int tls_sw_recvmsg(struct sock *sk,
+                       goto recv_end;
+               }
+-              if (err == -EINPROGRESS) {
++              if (err == -EINPROGRESS)
+                       async = true;
+-                      num_async++;
+-              }
+               /* If the type of records being processed is not known yet,
+                * set it to record type just dequeued. If it is already known,
+@@ -1914,7 +1910,9 @@ int tls_sw_recvmsg(struct sock *sk,
+       }
+ recv_end:
+-      if (num_async) {
++      if (async) {
++              int pending;
++
+               /* Wait for all previously submitted records to be decrypted */
+               spin_lock_bh(&ctx->decrypt_compl_lock);
+               ctx->async_notify = true;
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch b/queue-5.15/tls-rx-drop-unnecessary-arguments-from-tls_setup_fro.patch
new file mode 100644 (file)
index 0000000..19ba798
--- /dev/null
@@ -0,0 +1,73 @@
+From 1a773bacd9783962656ab5cba2346b81f34697cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Apr 2022 11:31:24 -0700
+Subject: tls: rx: drop unnecessary arguments from tls_setup_from_iter()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit d4bd88e67666c73cfa9d75c282e708890d4f10a7 ]
+
+sk is unused, remove it to make it clear the function
+doesn't poke at the socket.
+
+size_used is always 0 on input and @length on success.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index eed32ef3ca4a0..cf09f147f5a09 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1348,15 +1348,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
+       return skb;
+ }
+-static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
++static int tls_setup_from_iter(struct iov_iter *from,
+                              int length, int *pages_used,
+-                             unsigned int *size_used,
+                              struct scatterlist *to,
+                              int to_max_pages)
+ {
+       int rc = 0, i = 0, num_elem = *pages_used, maxpages;
+       struct page *pages[MAX_SKB_FRAGS];
+-      unsigned int size = *size_used;
++      unsigned int size = 0;
+       ssize_t copied, use;
+       size_t offset;
+@@ -1399,8 +1398,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+               sg_mark_end(&to[num_elem - 1]);
+ out:
+       if (rc)
+-              iov_iter_revert(from, size - *size_used);
+-      *size_used = size;
++              iov_iter_revert(from, size);
+       *pages_used = num_elem;
+       return rc;
+@@ -1519,12 +1517,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                       sg_init_table(sgout, n_sgout);
+                       sg_set_buf(&sgout[0], aad, prot->aad_size);
+-                      *chunk = 0;
+-                      err = tls_setup_from_iter(sk, out_iov, data_len,
+-                                                &pages, chunk, &sgout[1],
++                      err = tls_setup_from_iter(out_iov, data_len,
++                                                &pages, &sgout[1],
+                                                 (n_sgout - 1));
+                       if (err < 0)
+                               goto fallback_to_reg_recv;
++                      *chunk = data_len;
+               } else if (out_sg) {
+                       memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
+               } else {
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch b/queue-5.15/tls-rx-factor-out-writing-contenttype-to-cmsg.patch
new file mode 100644 (file)
index 0000000..f05695f
--- /dev/null
@@ -0,0 +1,196 @@
+From 0691e263e075592a7610cea28a3a235650592b15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Apr 2022 11:31:28 -0700
+Subject: tls: rx: factor out writing ContentType to cmsg
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 06554f4ffc2595ae52ee80aec4a13bd77d22bed7 ]
+
+cmsg can be filled in during rx_list processing or normal
+receive. Consolidate the code.
+
+We don't need to keep the boolean to track if the cmsg was
+created. 0 is an invalid content type.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 91 +++++++++++++++++++-----------------------------
+ 1 file changed, 36 insertions(+), 55 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index c491cde30504e..ca71a9f559b37 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1634,6 +1634,29 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+       return true;
+ }
++static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm,
++                                 u8 *control)
++{
++      int err;
++
++      if (!*control) {
++              *control = tlm->control;
++              if (!*control)
++                      return -EBADMSG;
++
++              err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
++                             sizeof(*control), control);
++              if (*control != TLS_RECORD_TYPE_DATA) {
++                      if (err || msg->msg_flags & MSG_CTRUNC)
++                              return -EIO;
++              }
++      } else if (*control != tlm->control) {
++              return 0;
++      }
++
++      return 1;
++}
++
+ /* This function traverses the rx_list in tls receive context to copies the
+  * decrypted records into the buffer provided by caller zero copy is not
+  * true. Further, the records are removed from the rx_list if it is not a peek
+@@ -1642,31 +1665,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+ static int process_rx_list(struct tls_sw_context_rx *ctx,
+                          struct msghdr *msg,
+                          u8 *control,
+-                         bool *cmsg,
+                          size_t skip,
+                          size_t len,
+                          bool zc,
+                          bool is_peek)
+ {
+       struct sk_buff *skb = skb_peek(&ctx->rx_list);
+-      u8 ctrl = *control;
+-      u8 msgc = *cmsg;
+       struct tls_msg *tlm;
+       ssize_t copied = 0;
+-
+-      /* Set the record type in 'control' if caller didn't pass it */
+-      if (!ctrl && skb) {
+-              tlm = tls_msg(skb);
+-              ctrl = tlm->control;
+-      }
++      int err;
+       while (skip && skb) {
+               struct strp_msg *rxm = strp_msg(skb);
+               tlm = tls_msg(skb);
+-              /* Cannot process a record of different type */
+-              if (ctrl != tlm->control)
+-                      return 0;
++              err = tls_record_content_type(msg, tlm, control);
++              if (err <= 0)
++                      return err;
+               if (skip < rxm->full_len)
+                       break;
+@@ -1682,27 +1697,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
+               tlm = tls_msg(skb);
+-              /* Cannot process a record of different type */
+-              if (ctrl != tlm->control)
+-                      return 0;
+-
+-              /* Set record type if not already done. For a non-data record,
+-               * do not proceed if record type could not be copied.
+-               */
+-              if (!msgc) {
+-                      int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+-                                          sizeof(ctrl), &ctrl);
+-                      msgc = true;
+-                      if (ctrl != TLS_RECORD_TYPE_DATA) {
+-                              if (cerr || msg->msg_flags & MSG_CTRUNC)
+-                                      return -EIO;
+-
+-                              *cmsg = msgc;
+-                      }
+-              }
++              err = tls_record_content_type(msg, tlm, control);
++              if (err <= 0)
++                      return err;
+               if (!zc || (rxm->full_len - skip) > len) {
+-                      int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
++                      err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+                                                   msg, chunk);
+                       if (err < 0)
+                               return err;
+@@ -1739,7 +1739,6 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
+               skb = next_skb;
+       }
+-      *control = ctrl;
+       return copied;
+ }
+@@ -1761,7 +1760,6 @@ int tls_sw_recvmsg(struct sock *sk,
+       struct tls_msg *tlm;
+       struct sk_buff *skb;
+       ssize_t copied = 0;
+-      bool cmsg = false;
+       int target, err = 0;
+       long timeo;
+       bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+@@ -1778,8 +1776,7 @@ int tls_sw_recvmsg(struct sock *sk,
+       bpf_strp_enabled = sk_psock_strp_enabled(psock);
+       /* Process pending decrypted records. It must be non-zero-copy */
+-      err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
+-                            is_peek);
++      err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek);
+       if (err < 0) {
+               tls_err_abort(sk, err);
+               goto end;
+@@ -1851,26 +1848,10 @@ int tls_sw_recvmsg(struct sock *sk,
+                * is known just after record is dequeued from stream parser.
+                * For tls1.3, we disable async.
+                */
+-
+-              if (!control)
+-                      control = tlm->control;
+-              else if (control != tlm->control)
++              err = tls_record_content_type(msg, tlm, &control);
++              if (err <= 0)
+                       goto recv_end;
+-              if (!cmsg) {
+-                      int cerr;
+-
+-                      cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+-                                      sizeof(control), &control);
+-                      cmsg = true;
+-                      if (control != TLS_RECORD_TYPE_DATA) {
+-                              if (cerr || msg->msg_flags & MSG_CTRUNC) {
+-                                      err = -EIO;
+-                                      goto recv_end;
+-                              }
+-                      }
+-              }
+-
+               if (async) {
+                       /* TLS 1.2-only, to_decrypt must be text length */
+                       chunk = min_t(int, to_decrypt, len);
+@@ -1959,10 +1940,10 @@ int tls_sw_recvmsg(struct sock *sk,
+               /* Drain records from the rx_list & copy if required */
+               if (is_peek || is_kvec)
+-                      err = process_rx_list(ctx, msg, &control, &cmsg, copied,
++                      err = process_rx_list(ctx, msg, &control, copied,
+                                             decrypted, false, is_peek);
+               else
+-                      err = process_rx_list(ctx, msg, &control, &cmsg, 0,
++                      err = process_rx_list(ctx, msg, &control, 0,
+                                             decrypted, true, is_peek);
+               if (err < 0) {
+                       tls_err_abort(sk, err);
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch b/queue-5.15/tls-rx-move-counting-tlsdecrypterrors-for-sync.patch
new file mode 100644 (file)
index 0000000..38177da
--- /dev/null
@@ -0,0 +1,49 @@
+From c2fb73ea5bd49d8a9c033c57ad6b98af0a162093 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 12:19:10 -0700
+Subject: tls: rx: move counting TlsDecryptErrors for sync
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 284b4d93daee56dff3e10029ddf2e03227f50dbf ]
+
+Move counting TlsDecryptErrors to tls_do_decryption()
+where differences between sync and async crypto are
+reconciled.
+
+No functional changes, this code just always gave
+me a pause.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index d3bbae9af9f41..85fa49170b4e5 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -274,6 +274,8 @@ static int tls_do_decryption(struct sock *sk,
+               ret = crypto_wait_req(ret, &ctx->async_wait);
+       }
++      if (ret == -EBADMSG)
++              TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
+       if (async)
+               atomic_dec(&ctx->decrypt_pending);
+@@ -1583,8 +1585,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       if (err < 0) {
+               if (err == -EINPROGRESS)
+                       tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+-              else if (err == -EBADMSG)
+-                      TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
+               return err;
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-refactor-decrypt_skb_update.patch b/queue-5.15/tls-rx-refactor-decrypt_skb_update.patch
new file mode 100644 (file)
index 0000000..1f4c24a
--- /dev/null
@@ -0,0 +1,107 @@
+From a31e78e9ccb122c8276bfbc8343347a95e5e48af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 20:38:22 -0700
+Subject: tls: rx: refactor decrypt_skb_update()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 3764ae5ba6615095de86698a00e814513b9ad0d5 ]
+
+Use early return and a jump label to remove two indentation levels.
+No functional changes.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 66 ++++++++++++++++++++++++------------------------
+ 1 file changed, 33 insertions(+), 33 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 5fdc4f5193ee5..7da17dd7c38b9 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1560,46 +1560,46 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+       struct strp_msg *rxm = strp_msg(skb);
+       struct tls_msg *tlm = tls_msg(skb);
+-      int pad, err = 0;
++      int pad, err;
+-      if (!tlm->decrypted) {
+-              if (tls_ctx->rx_conf == TLS_HW) {
+-                      err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
+-                      if (err < 0)
+-                              return err;
+-              }
++      if (tlm->decrypted) {
++              *zc = false;
++              return 0;
++      }
+-              /* Still not decrypted after tls_device */
+-              if (!tlm->decrypted) {
+-                      err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
+-                                             async);
+-                      if (err < 0) {
+-                              if (err == -EINPROGRESS)
+-                                      tls_advance_record_sn(sk, prot,
+-                                                            &tls_ctx->rx);
+-                              else if (err == -EBADMSG)
+-                                      TLS_INC_STATS(sock_net(sk),
+-                                                    LINUX_MIB_TLSDECRYPTERROR);
+-                              return err;
+-                      }
+-              } else {
++      if (tls_ctx->rx_conf == TLS_HW) {
++              err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
++              if (err < 0)
++                      return err;
++
++              /* skip SW decryption if NIC handled it already */
++              if (tlm->decrypted) {
+                       *zc = false;
++                      goto decrypt_done;
+               }
++      }
+-              pad = padding_length(prot, skb);
+-              if (pad < 0)
+-                      return pad;
+-
+-              rxm->full_len -= pad;
+-              rxm->offset += prot->prepend_size;
+-              rxm->full_len -= prot->overhead_size;
+-              tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+-              tlm->decrypted = 1;
+-      } else {
+-              *zc = false;
++      err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async);
++      if (err < 0) {
++              if (err == -EINPROGRESS)
++                      tls_advance_record_sn(sk, prot, &tls_ctx->rx);
++              else if (err == -EBADMSG)
++                      TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
++              return err;
+       }
+-      return err;
++decrypt_done:
++      pad = padding_length(prot, skb);
++      if (pad < 0)
++              return pad;
++
++      rxm->full_len -= pad;
++      rxm->offset += prot->prepend_size;
++      rxm->full_len -= prot->overhead_size;
++      tls_advance_record_sn(sk, prot, &tls_ctx->rx);
++      tlm->decrypted = 1;
++
++      return 0;
+ }
+ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch b/queue-5.15/tls-rx-use-async-as-an-in-out-argument.patch
new file mode 100644 (file)
index 0000000..c8d6033
--- /dev/null
@@ -0,0 +1,118 @@
+From bda007d9121ca97630a5bd3ffede8021d4c75177 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 12:19:15 -0700
+Subject: tls: rx: use async as an in-out argument
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 3547a1f9d988d88ecff4fc365d2773037c849f49 ]
+
+Propagating EINPROGRESS thru multiple layers of functions is
+error prone. Use darg->async as an in/out argument, like we
+use darg->zc today. On input it tells the code if async is
+allowed, on output if it took place.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 27ac27daec868..a1a99f9f093b1 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -236,7 +236,7 @@ static int tls_do_decryption(struct sock *sk,
+                            char *iv_recv,
+                            size_t data_len,
+                            struct aead_request *aead_req,
+-                           bool async)
++                           struct tls_decrypt_arg *darg)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+@@ -249,7 +249,7 @@ static int tls_do_decryption(struct sock *sk,
+                              data_len + prot->tag_size,
+                              (u8 *)iv_recv);
+-      if (async) {
++      if (darg->async) {
+               /* Using skb->sk to push sk through to crypto async callback
+                * handler. This allows propagating errors up to the socket
+                * if needed. It _must_ be cleared in the async handler
+@@ -269,11 +269,13 @@ static int tls_do_decryption(struct sock *sk,
+       ret = crypto_aead_decrypt(aead_req);
+       if (ret == -EINPROGRESS) {
+-              if (async)
+-                      return ret;
++              if (darg->async)
++                      return 0;
+               ret = crypto_wait_req(ret, &ctx->async_wait);
+       }
++      darg->async = false;
++
+       if (ret == -EBADMSG)
+               TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
+@@ -1540,9 +1542,9 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+       /* Prepare and submit AEAD request */
+       err = tls_do_decryption(sk, skb, sgin, sgout, iv,
+-                              data_len, aead_req, darg->async);
+-      if (err == -EINPROGRESS)
+-              return err;
++                              data_len, aead_req, darg);
++      if (darg->async)
++              return 0;
+       /* Release the pages in case iov was mapped to pages */
+       for (; pages > 0; pages--)
+@@ -1579,11 +1581,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       }
+       err = decrypt_internal(sk, skb, dest, NULL, darg);
+-      if (err < 0) {
+-              if (err == -EINPROGRESS)
+-                      tls_advance_record_sn(sk, prot, &tls_ctx->rx);
++      if (err < 0)
+               return err;
+-      }
++      if (darg->async)
++              goto decrypt_next;
+ decrypt_done:
+       pad = padding_length(prot, skb);
+@@ -1593,8 +1594,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       rxm->full_len -= pad;
+       rxm->offset += prot->prepend_size;
+       rxm->full_len -= prot->overhead_size;
+-      tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+       tlm->decrypted = 1;
++decrypt_next:
++      tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+       return 0;
+ }
+@@ -1826,13 +1828,12 @@ int tls_sw_recvmsg(struct sock *sk,
+                       darg.async = false;
+               err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg);
+-              if (err < 0 && err != -EINPROGRESS) {
++              if (err < 0) {
+                       tls_err_abort(sk, -EBADMSG);
+                       goto recv_end;
+               }
+-              if (err == -EINPROGRESS)
+-                      async = true;
++              async |= darg.async;
+               /* If the type of records being processed is not known yet,
+                * set it to record type just dequeued. If it is already known,
+-- 
+2.43.0
+
diff --git a/queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch b/queue-5.15/tls-rx-wrap-decryption-arguments-in-a-structure.patch
new file mode 100644 (file)
index 0000000..3895d44
--- /dev/null
@@ -0,0 +1,197 @@
+From 890569dace1deb9afd771b5fb88d07455426c5b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Apr 2022 11:31:26 -0700
+Subject: tls: rx: wrap decryption arguments in a structure
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 4175eac37123a68ebee71f288826339fb89bfec7 ]
+
+We pass zc as a pointer to bool a few functions down as an in/out
+argument. This is error prone since C will happily evalue a pointer
+as a boolean (IOW forgetting *zc and writing zc leads to loss of
+developer time..). Wrap the arguments into a structure.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: f7fa16d49837 ("tls: decrement decrypt_pending if no async completion will be called")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 49 ++++++++++++++++++++++++++----------------------
+ 1 file changed, 27 insertions(+), 22 deletions(-)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index fc1fa98d21937..c491cde30504e 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -44,6 +44,11 @@
+ #include <net/strparser.h>
+ #include <net/tls.h>
++struct tls_decrypt_arg {
++      bool zc;
++      bool async;
++};
++
+ noinline void tls_err_abort(struct sock *sk, int err)
+ {
+       WARN_ON_ONCE(err >= 0);
+@@ -1415,7 +1420,7 @@ static int tls_setup_from_iter(struct iov_iter *from,
+ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                           struct iov_iter *out_iov,
+                           struct scatterlist *out_sg,
+-                          bool *zc, bool async)
++                          struct tls_decrypt_arg *darg)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+@@ -1432,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                            prot->tail_size;
+       int iv_offset = 0;
+-      if (*zc && (out_iov || out_sg)) {
++      if (darg->zc && (out_iov || out_sg)) {
+               if (out_iov)
+                       n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+               else
+@@ -1441,7 +1446,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+                                rxm->full_len - prot->prepend_size);
+       } else {
+               n_sgout = 0;
+-              *zc = false;
++              darg->zc = false;
+               n_sgin = skb_cow_data(skb, 0, &unused);
+       }
+@@ -1531,12 +1536,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+ fallback_to_reg_recv:
+               sgout = sgin;
+               pages = 0;
+-              *zc = false;
++              darg->zc = false;
+       }
+       /* Prepare and submit AEAD request */
+       err = tls_do_decryption(sk, skb, sgin, sgout, iv,
+-                              data_len, aead_req, async);
++                              data_len, aead_req, darg->async);
+       if (err == -EINPROGRESS)
+               return err;
+@@ -1549,7 +1554,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+ }
+ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+-                            struct iov_iter *dest, bool *zc, bool async)
++                            struct iov_iter *dest,
++                            struct tls_decrypt_arg *darg)
+ {
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_prot_info *prot = &tls_ctx->prot_info;
+@@ -1558,7 +1564,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+       int pad, err;
+       if (tlm->decrypted) {
+-              *zc = false;
++              darg->zc = false;
+               return 0;
+       }
+@@ -1568,12 +1574,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+                       return err;
+               if (err > 0) {
+                       tlm->decrypted = 1;
+-                      *zc = false;
++                      darg->zc = false;
+                       goto decrypt_done;
+               }
+       }
+-      err = decrypt_internal(sk, skb, dest, NULL, zc, async);
++      err = decrypt_internal(sk, skb, dest, NULL, darg);
+       if (err < 0) {
+               if (err == -EINPROGRESS)
+                       tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+@@ -1599,9 +1605,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+               struct scatterlist *sgout)
+ {
+-      bool zc = true;
++      struct tls_decrypt_arg darg = { .zc = true, };
+-      return decrypt_internal(sk, skb, NULL, sgout, &zc, false);
++      return decrypt_internal(sk, skb, NULL, sgout, &darg);
+ }
+ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+@@ -1790,11 +1796,10 @@ int tls_sw_recvmsg(struct sock *sk,
+       decrypted = 0;
+       num_async = 0;
+       while (len && (decrypted + copied < target || ctx->recv_pkt)) {
++              struct tls_decrypt_arg darg = {};
+               bool retain_skb = false;
+               int to_decrypt, chunk;
+-              bool zc = false;
+-              bool async_capable;
+-              bool async = false;
++              bool async;
+               skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
+               if (!skb) {
+@@ -1820,16 +1825,15 @@ int tls_sw_recvmsg(struct sock *sk,
+                   tlm->control == TLS_RECORD_TYPE_DATA &&
+                   prot->version != TLS_1_3_VERSION &&
+                   !bpf_strp_enabled)
+-                      zc = true;
++                      darg.zc = true;
+               /* Do not use async mode if record is non-data */
+               if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
+-                      async_capable = ctx->async_capable;
++                      darg.async = ctx->async_capable;
+               else
+-                      async_capable = false;
++                      darg.async = false;
+-              err = decrypt_skb_update(sk, skb, &msg->msg_iter,
+-                                       &zc, async_capable);
++              err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg);
+               if (err < 0 && err != -EINPROGRESS) {
+                       tls_err_abort(sk, -EBADMSG);
+                       goto recv_end;
+@@ -1875,7 +1879,7 @@ int tls_sw_recvmsg(struct sock *sk,
+               /* TLS 1.3 may have updated the length by more than overhead */
+               chunk = rxm->full_len;
+-              if (!zc) {
++              if (!darg.zc) {
+                       if (bpf_strp_enabled) {
+                               err = sk_psock_tls_strp_read(psock, skb);
+                               if (err != __SK_PASS) {
+@@ -1991,7 +1995,6 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+       int err = 0;
+       long timeo;
+       int chunk;
+-      bool zc = false;
+       lock_sock(sk);
+@@ -2001,12 +2004,14 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+       if (from_queue) {
+               skb = __skb_dequeue(&ctx->rx_list);
+       } else {
++              struct tls_decrypt_arg darg = {};
++
+               skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
+                                   &err);
+               if (!skb)
+                       goto splice_read_end;
+-              err = decrypt_skb_update(sk, skb, NULL, &zc, false);
++              err = decrypt_skb_update(sk, skb, NULL, &darg);
+               if (err < 0) {
+                       tls_err_abort(sk, -EBADMSG);
+                       goto splice_read_end;
+-- 
+2.43.0
+
diff --git a/queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch b/queue-5.15/tun-fix-xdp_rxq_info-s-queue_index-when-detaching.patch
new file mode 100644 (file)
index 0000000..0feb97a
--- /dev/null
@@ -0,0 +1,36 @@
+From ae5b910252b82a29df0eb2f8a1196cd113446330 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Feb 2024 11:12:07 +0800
+Subject: tun: Fix xdp_rxq_info's queue_index when detaching
+
+From: Yunjian Wang <wangyunjian@huawei.com>
+
+[ Upstream commit 2a770cdc4382b457ca3d43d03f0f0064f905a0d0 ]
+
+When a queue(tfile) is detached, we only update tfile's queue_index,
+but do not update xdp_rxq_info's queue_index. This patch fixes it.
+
+Fixes: 8bf5c4ee1889 ("tun: setup xdp_rxq_info")
+Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
+Link: https://lore.kernel.org/r/1708398727-46308-1-git-send-email-wangyunjian@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/tun.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 603530e6cd7b9..42bf0a3ec632e 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -654,6 +654,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
+                                  tun->tfiles[tun->numqueues - 1]);
+               ntfile = rtnl_dereference(tun->tfiles[index]);
+               ntfile->queue_index = index;
++              ntfile->xdp_rxq.queue_index = index;
+               rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
+                                  NULL);
+-- 
+2.43.0
+
diff --git a/queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch b/queue-5.15/uapi-in6-replace-temporary-label-with-rfc9486.patch
new file mode 100644 (file)
index 0000000..b17bbd4
--- /dev/null
@@ -0,0 +1,39 @@
+From 29b0929be27d9f67e2bba4ba7a07aa11a6e6cd64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Feb 2024 13:49:21 +0100
+Subject: uapi: in6: replace temporary label with rfc9486
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit 6a2008641920a9c6fe1abbeb9acbec463215d505 ]
+
+Not really a fix per se, but IPV6_TLV_IOAM is still tagged as "TEMPORARY
+IANA allocation for IOAM", while RFC 9486 is available for some time
+now. Just update the reference.
+
+Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace")
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240226124921.9097-1-justin.iurman@uliege.be
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/uapi/linux/in6.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
+index c4c53a9ab9595..ff8d21f9e95b7 100644
+--- a/include/uapi/linux/in6.h
++++ b/include/uapi/linux/in6.h
+@@ -145,7 +145,7 @@ struct in6_flowlabel_req {
+ #define IPV6_TLV_PADN         1
+ #define IPV6_TLV_ROUTERALERT  5
+ #define IPV6_TLV_CALIPSO      7       /* RFC 5570 */
+-#define IPV6_TLV_IOAM         49      /* TEMPORARY IANA allocation for IOAM */
++#define IPV6_TLV_IOAM         49      /* RFC 9486 */
+ #define IPV6_TLV_JUMBO                194
+ #define IPV6_TLV_HAO          201     /* home address option */
+-- 
+2.43.0
+
diff --git a/queue-5.15/veth-try-harder-when-allocating-queue-memory.patch b/queue-5.15/veth-try-harder-when-allocating-queue-memory.patch
new file mode 100644 (file)
index 0000000..47db731
--- /dev/null
@@ -0,0 +1,55 @@
+From 0fef6a2d8b391073a15483c69a6a0b378aac6569 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 15:59:08 -0800
+Subject: veth: try harder when allocating queue memory
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1ce7d306ea63f3e379557c79abd88052e0483813 ]
+
+struct veth_rq is pretty large, 832B total without debug
+options enabled. Since commit under Fixes we try to pre-allocate
+enough queues for every possible CPU. Miao Wang reports that
+this may lead to order-5 allocations which will fail in production.
+
+Let the allocation fallback to vmalloc() and try harder.
+These are the same flags we pass to netdev queue allocation.
+
+Reported-and-tested-by: Miao Wang <shankerwangmiao@gmail.com>
+Fixes: 9d3684c24a52 ("veth: create by default nr_possible_cpus queues")
+Link: https://lore.kernel.org/all/5F52CAE2-2FB7-4712-95F1-3312FBBFA8DD@gmail.com/
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240223235908.693010-1-kuba@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 87cee614618ca..0102f86d48676 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1303,7 +1303,8 @@ static int veth_alloc_queues(struct net_device *dev)
+       struct veth_priv *priv = netdev_priv(dev);
+       int i;
+-      priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
++      priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
++                          GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+       if (!priv->rq)
+               return -ENOMEM;
+@@ -1319,7 +1320,7 @@ static void veth_free_queues(struct net_device *dev)
+ {
+       struct veth_priv *priv = netdev_priv(dev);
+-      kfree(priv->rq);
++      kvfree(priv->rq);
+ }
+ static int veth_dev_init(struct net_device *dev)
+-- 
+2.43.0
+