Fixes for 6.10

author Sasha Levin <sashal@kernel.org>

Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)
diff --git a/queue-6.10/9p-fix-dio-read-through-netfs.patch b/queue-6.10/9p-fix-dio-read-through-netfs.patch

new file mode 100644 (file)

index 0000000..6ea918b
--- /dev/null
+++ b/queue-6.10/9p-fix-dio-read-through-netfs.patch
@@ -0,0 +1,193 @@
+From 56fca566d91b2a1e56ab124811765dddbbddaf71 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 14:29:38 +0100
+Subject: 9p: Fix DIO read through netfs
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+[ Upstream commit e3786b29c54cdae3490b07180a54e2461f42144c ]
+
+If a program is watching a file on a 9p mount, it won't see any change in
+size if the file being exported by the server is changed directly in the
+source filesystem, presumably because 9p doesn't have change notifications,
+and because netfs skips the reads if the file is empty.
+
+Fix this by attempting to read the full size specified when a DIO read is
+requested (such as when 9p is operating in unbuffered mode) and dealing
+with a short read if the EOF was less than the expected read.
+
+To make this work, filesystems using netfslib must not set
+NETFS_SREQ_CLEAR_TAIL if performing a DIO read where that read hit the EOF.
+I don't want to mandatorily clear this flag in netfslib for DIO because,
+say, ceph might make a read from an object that is not completely filled,
+but does not reside at the end of file - and so we need to clear the
+excess.
+
+This can be tested by watching an empty file over 9p within a VM (such as
+in the ktest framework):
+
+        while true; do read content; if [ -n "$content" ]; then echo $content; break; fi; done < /host/tmp/foo
+
+then writing something into the empty file.  The watcher should immediately
+display the file content and break out of the loop.  Without this fix, it
+remains in the loop indefinitely.
+
+Fixes: 80105ed2fd27 ("9p: Use netfslib read/write_iter")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218916
+Signed-off-by: David Howells <dhowells@redhat.com>
+Link: https://lore.kernel.org/r/1229195.1723211769@warthog.procyon.org.uk
+cc: Eric Van Hensbergen <ericvh@kernel.org>
+cc: Latchesar Ionkov <lucho@ionkov.net>
+cc: Christian Schoenebeck <linux_oss@crudebyte.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Ilya Dryomov <idryomov@gmail.com>
+cc: Steve French <sfrench@samba.org>
+cc: Paulo Alcantara <pc@manguebit.com>
+cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+cc: v9fs@lists.linux.dev
+cc: linux-afs@lists.infradead.org
+cc: ceph-devel@vger.kernel.org
+cc: linux-cifs@vger.kernel.org
+cc: linux-nfs@vger.kernel.org
+cc: netfs@lists.linux.dev
+cc: linux-fsdevel@vger.kernel.org
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/9p/vfs_addr.c     |  3 ++-
+ fs/afs/file.c        |  3 ++-
+ fs/ceph/addr.c       |  6 ++++--
+ fs/netfs/io.c        | 17 +++++++++++------
+ fs/nfs/fscache.c     |  3 ++-
+ fs/smb/client/file.c |  3 ++-
+ 6 files changed, 23 insertions(+), 12 deletions(-)
+
+diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
+index a97ceb105cd8d..24fdc74caeba4 100644
+--- a/fs/9p/vfs_addr.c
++++ b/fs/9p/vfs_addr.c
+@@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
+ 
+       /* if we just extended the file size, any portion not in
+        * cache won't be on server and is zeroes */
+-      __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
++      if (subreq->rreq->origin != NETFS_DIO_READ)
++              __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ 
+       netfs_subreq_terminated(subreq, err ?: total, false);
+ }
+diff --git a/fs/afs/file.c b/fs/afs/file.c
+index c3f0c45ae9a9b..ec1be0091fdb5 100644
+--- a/fs/afs/file.c
++++ b/fs/afs/file.c
+@@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op)
+ 
+       req->error = error;
+       if (subreq) {
+-              __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
++              if (subreq->rreq->origin != NETFS_DIO_READ)
++                      __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
+               req->subreq = NULL;
+       } else if (req->done) {
+diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
+index 73b5a07bf94de..d2194022132ec 100644
+--- a/fs/ceph/addr.c
++++ b/fs/ceph/addr.c
+@@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
+       if (err >= 0) {
+               if (sparse && err > 0)
+                       err = ceph_sparse_ext_map_end(op);
+-              if (err < subreq->len)
++              if (err < subreq->len &&
++                  subreq->rreq->origin != NETFS_DIO_READ)
+                       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               if (IS_ENCRYPTED(inode) && err > 0) {
+                       err = ceph_fscrypt_decrypt_extents(inode,
+@@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
+       size_t len;
+       int mode;
+ 
+-      __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
++      if (rreq->origin != NETFS_DIO_READ)
++              __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
+ 
+       if (subreq->start >= inode->i_size)
+diff --git a/fs/netfs/io.c b/fs/netfs/io.c
+index f3abc5dfdbc0c..19ec6990dc91e 100644
+--- a/fs/netfs/io.c
++++ b/fs/netfs/io.c
+@@ -530,7 +530,8 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
+ 
+       if (transferred_or_error == 0) {
+               if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
+-                      subreq->error = -ENODATA;
++                      if (rreq->origin != NETFS_DIO_READ)
++                              subreq->error = -ENODATA;
+                       goto failed;
+               }
+       } else {
+@@ -601,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
+                       }
+                       if (subreq->len > ictx->zero_point - subreq->start)
+                               subreq->len = ictx->zero_point - subreq->start;
++
++                      /* We limit buffered reads to the EOF, but let the
++                       * server deal with larger-than-EOF DIO/unbuffered
++                       * reads.
++                       */
++                      if (subreq->len > rreq->i_size - subreq->start)
++                              subreq->len = rreq->i_size - subreq->start;
+               }
+-              if (subreq->len > rreq->i_size - subreq->start)
+-                      subreq->len = rreq->i_size - subreq->start;
+               if (rreq->rsize && subreq->len > rreq->rsize)
+                       subreq->len = rreq->rsize;
+ 
+@@ -739,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
+       do {
+               kdebug("submit %llx + %llx >= %llx",
+                      rreq->start, rreq->submitted, rreq->i_size);
+-              if (rreq->origin == NETFS_DIO_READ &&
+-                  rreq->start + rreq->submitted >= rreq->i_size)
+-                      break;
+               if (!netfs_rreq_submit_slice(rreq, &io_iter))
+                       break;
++              if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags))
++                      break;
+               if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
+                   test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
+                       break;
+diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
+index ddc1ee0319554..bc20ba50283c8 100644
+--- a/fs/nfs/fscache.c
++++ b/fs/nfs/fscache.c
+@@ -361,7 +361,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
+               return;
+ 
+       sreq = netfs->sreq;
+-      if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
++      if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
++          sreq->rreq->origin != NETFS_DIO_READ)
+               __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
+ 
+       if (hdr->error)
+diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
+index 2e3c4d0277dbb..9e4f4e67768b9 100644
+--- a/fs/smb/client/file.c
++++ b/fs/smb/client/file.c
+@@ -196,7 +196,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
+                       goto out;
+       }
+ 
+-      __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
++      if (subreq->rreq->origin != NETFS_DIO_READ)
++              __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ 
+       rc = rdata->server->ops->async_readv(rdata);
+ out:
+-- 
+2.43.0
+
diff --git a/queue-6.10/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch b/queue-6.10/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch

new file mode 100644 (file)

index 0000000..df4d6ed
--- /dev/null
+++ b/queue-6.10/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch
@@ -0,0 +1,39 @@
+From 9d501005a9abce37e6a9a2de0ee653952fc36dae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Aug 2024 18:39:06 +0330
+Subject: ALSA: hda/realtek: Fix noise from speakers on Lenovo IdeaPad 3 15IAU7
+
+From: Parsa Poorshikhian <parsa.poorsh@gmail.com>
+
+[ Upstream commit ef9718b3d54e822de294351251f3a574f8a082ce ]
+
+Fix noise from speakers connected to AUX port when no sound is playing.
+The problem occurs because the `alc_shutup_pins` function includes
+a 0x10ec0257 vendor ID, which causes noise on Lenovo IdeaPad 3 15IAU7 with
+Realtek ALC257 codec when no sound is playing.
+Removing this vendor ID from the function fixes the bug.
+
+Fixes: 70794b9563fe ("ALSA: hda/realtek: Add more codec ID to no shutup pins list")
+Signed-off-by: Parsa Poorshikhian <parsa.poorsh@gmail.com>
+Link: https://patch.msgid.link/20240810150939.330693-1-parsa.poorsh@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 3840565ef8b02..c9d76bca99232 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -583,7 +583,6 @@ static void alc_shutup_pins(struct hda_codec *codec)
+       switch (codec->core.vendor_id) {
+       case 0x10ec0236:
+       case 0x10ec0256:
+-      case 0x10ec0257:
+       case 0x19e58326:
+       case 0x10ec0283:
+       case 0x10ec0285:
+-- 
+2.43.0
+
diff --git a/queue-6.10/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch b/queue-6.10/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch

new file mode 100644 (file)

index 0000000..52fa831
--- /dev/null
+++ b/queue-6.10/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch
@@ -0,0 +1,56 @@
+From 3a77dad0589719ee3296fd7330be4440936b379e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:28:19 +0300
+Subject: atm: idt77252: prevent use after free in dequeue_rx()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit a9a18e8f770c9b0703dab93580d0b02e199a4c79 ]
+
+We can't dereference "skb" after calling vcc->push() because the skb
+is released.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/atm/idt77252.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
+index e7f713cd70d3f..a876024d8a05f 100644
+--- a/drivers/atm/idt77252.c
++++ b/drivers/atm/idt77252.c
+@@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
+       rpp->len += skb->len;
+ 
+       if (stat & SAR_RSQE_EPDU) {
++              unsigned int len, truesize;
+               unsigned char *l1l2;
+-              unsigned int len;
+ 
+               l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6);
+ 
+@@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
+               ATM_SKB(skb)->vcc = vcc;
+               __net_timestamp(skb);
+ 
++              truesize = skb->truesize;
+               vcc->push(vcc, skb);
+               atomic_inc(&vcc->stats->rx);
+ 
+-              if (skb->truesize > SAR_FB_SIZE_3)
++              if (truesize > SAR_FB_SIZE_3)
+                       add_rx_skb(card, 3, SAR_FB_SIZE_3, 1);
+-              else if (skb->truesize > SAR_FB_SIZE_2)
++              else if (truesize > SAR_FB_SIZE_2)
+                       add_rx_skb(card, 2, SAR_FB_SIZE_2, 1);
+-              else if (skb->truesize > SAR_FB_SIZE_1)
++              else if (truesize > SAR_FB_SIZE_1)
+                       add_rx_skb(card, 1, SAR_FB_SIZE_1, 1);
+               else
+                       add_rx_skb(card, 0, SAR_FB_SIZE_0, 1);
+-- 
+2.43.0
+
diff --git a/queue-6.10/bpf-fix-a-kernel-verifier-crash-in-stacksafe.patch b/queue-6.10/bpf-fix-a-kernel-verifier-crash-in-stacksafe.patch

new file mode 100644 (file)

index 0000000..6cba3ad
--- /dev/null
+++ b/queue-6.10/bpf-fix-a-kernel-verifier-crash-in-stacksafe.patch
@@ -0,0 +1,57 @@
+From 99ba64bd8f67a115a02c06973299a90bfdb91ca3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Aug 2024 14:48:47 -0700
+Subject: bpf: Fix a kernel verifier crash in stacksafe()
+
+From: Yonghong Song <yonghong.song@linux.dev>
+
+[ Upstream commit bed2eb964c70b780fb55925892a74f26cb590b25 ]
+
+Daniel Hodges reported a kernel verifier crash when playing with sched-ext.
+Further investigation shows that the crash is due to invalid memory access
+in stacksafe(). More specifically, it is the following code:
+
+    if (exact != NOT_EXACT &&
+        old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+        cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+            return false;
+
+The 'i' iterates old->allocated_stack.
+If cur->allocated_stack < old->allocated_stack the out-of-bound
+access will happen.
+
+To fix the issue add 'i >= cur->allocated_stack' check such that if
+the condition is true, stacksafe() should fail. Otherwise,
+cur->stack[spi].slot_type[i % BPF_REG_SIZE] memory access is legal.
+
+Fixes: 2793a8b015f7 ("bpf: exact states comparison for iterator convergence checks")
+Cc: Eduard Zingerman <eddyz87@gmail.com>
+Reported-by: Daniel Hodges <hodgesd@meta.com>
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
+Link: https://lore.kernel.org/r/20240812214847.213612-1-yonghong.song@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index a8845cc299fec..521bd7efae038 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -16881,8 +16881,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+               spi = i / BPF_REG_SIZE;
+ 
+               if (exact != NOT_EXACT &&
+-                  old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+-                  cur->stack[spi].slot_type[i % BPF_REG_SIZE])
++                  (i >= cur->allocated_stack ||
++                   old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
++                   cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
+                       return false;
+ 
+               if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
+-- 
+2.43.0
+
diff --git a/queue-6.10/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch b/queue-6.10/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch

new file mode 100644 (file)

index 0000000..ffcb5b3
--- /dev/null
+++ b/queue-6.10/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch
@@ -0,0 +1,61 @@
+From d9ce49336ae46f6842b6ee16d1cf68bbb34e3144 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 28 Jul 2024 19:46:11 +0800
+Subject: bpf: Fix updating attached freplace prog in prog_array map
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Leon Hwang <leon.hwang@linux.dev>
+
+[ Upstream commit fdad456cbcca739bae1849549c7a999857c56f88 ]
+
+The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
+fixed a NULL pointer dereference panic, but didn't fix the issue that
+fails to update attached freplace prog to prog_array map.
+
+Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"),
+freplace prog and its target prog are able to tail call each other.
+
+And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach")
+sets prog->aux->dst_prog as NULL after attaching freplace prog to its
+target prog.
+
+After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS.
+Then, after attaching freplace its prog->aux->dst_prog is NULL.
+Then, while updating freplace in prog_array the bpf_prog_map_compatible()
+incorrectly returns false because resolve_prog_type() returns
+BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS.
+After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS
+and update to prog_array can succeed.
+
+Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
+Cc: Toke Høiland-Jørgensen <toke@redhat.com>
+Cc: Martin KaFai Lau <martin.lau@kernel.org>
+Acked-by: Yonghong Song <yonghong.song@linux.dev>
+Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
+Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index ff2a6cdb1fa3f..5db4a3f354804 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -846,8 +846,8 @@ static inline u32 type_flag(u32 type)
+ /* only use after check_attach_btf_id() */
+ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
+ {
+-      return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ?
+-              prog->aux->dst_prog->type : prog->type;
++      return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ?
++              prog->aux->saved_dst_prog_type : prog->type;
+ }
+ 
+ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
+-- 
+2.43.0
+
diff --git a/queue-6.10/btrfs-fix-invalid-mapping-of-extent-xarray-state.patch b/queue-6.10/btrfs-fix-invalid-mapping-of-extent-xarray-state.patch

new file mode 100644 (file)

index 0000000..1e43a80
--- /dev/null
+++ b/queue-6.10/btrfs-fix-invalid-mapping-of-extent-xarray-state.patch
@@ -0,0 +1,209 @@
+From 3382b9609fd61bb4733a9dd89b9904a7ceb10d0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 16:54:22 +0900
+Subject: btrfs: fix invalid mapping of extent xarray state
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 6252690f7e1b173b86a4c27dfc046b351ab423e7 ]
+
+In __extent_writepage_io(), we call btrfs_set_range_writeback() ->
+folio_start_writeback(), which clears PAGECACHE_TAG_DIRTY mark from the
+mapping xarray if the folio is not dirty. This worked fine before commit
+97713b1a2ced ("btrfs: do not clear page dirty inside
+extent_write_locked_range()").
+
+After the commit, however, the folio is still dirty at this point, so the
+mapping DIRTY tag is not cleared anymore. Then, __extent_writepage_io()
+calls btrfs_folio_clear_dirty() to clear the folio's dirty flag. That
+results in the page being unlocked with a "strange" state. The page is not
+PageDirty, but the mapping tag is set as PAGECACHE_TAG_DIRTY.
+
+This strange state looks like causing a hang with a call trace below when
+running fstests generic/091 on a null_blk device. It is waiting for a folio
+lock.
+
+While I don't have an exact relation between this hang and the strange
+state, fixing the state also fixes the hang. And, that state is worth
+fixing anyway.
+
+This commit reorders btrfs_folio_clear_dirty() and
+btrfs_set_range_writeback() in __extent_writepage_io(), so that the
+PAGECACHE_TAG_DIRTY tag is properly removed from the xarray.
+
+  [464.274] task:fsx             state:D stack:0     pid:3034  tgid:3034  ppid:2853   flags:0x00004002
+  [464.286] Call Trace:
+  [464.291]  <TASK>
+  [464.295]  __schedule+0x10ed/0x6260
+  [464.301]  ? __pfx___blk_flush_plug+0x10/0x10
+  [464.308]  ? __submit_bio+0x37c/0x450
+  [464.314]  ? __pfx___schedule+0x10/0x10
+  [464.321]  ? lock_release+0x567/0x790
+  [464.327]  ? __pfx_lock_acquire+0x10/0x10
+  [464.334]  ? __pfx_lock_release+0x10/0x10
+  [464.340]  ? __pfx_lock_acquire+0x10/0x10
+  [464.347]  ? __pfx_lock_release+0x10/0x10
+  [464.353]  ? do_raw_spin_lock+0x12e/0x270
+  [464.360]  schedule+0xdf/0x3b0
+  [464.365]  io_schedule+0x8f/0xf0
+  [464.371]  folio_wait_bit_common+0x2ca/0x6d0
+  [464.378]  ? folio_wait_bit_common+0x1cc/0x6d0
+  [464.385]  ? __pfx_folio_wait_bit_common+0x10/0x10
+  [464.392]  ? __pfx_filemap_get_folios_tag+0x10/0x10
+  [464.400]  ? __pfx_wake_page_function+0x10/0x10
+  [464.407]  ? __pfx___might_resched+0x10/0x10
+  [464.414]  ? do_raw_spin_unlock+0x58/0x1f0
+  [464.420]  extent_write_cache_pages+0xe49/0x1620 [btrfs]
+  [464.428]  ? lock_acquire+0x435/0x500
+  [464.435]  ? __pfx_extent_write_cache_pages+0x10/0x10 [btrfs]
+  [464.443]  ? btrfs_do_write_iter+0x493/0x640 [btrfs]
+  [464.451]  ? orc_find.part.0+0x1d4/0x380
+  [464.457]  ? __pfx_lock_release+0x10/0x10
+  [464.464]  ? __pfx_lock_release+0x10/0x10
+  [464.471]  ? btrfs_do_write_iter+0x493/0x640 [btrfs]
+  [464.478]  btrfs_writepages+0x1cc/0x460 [btrfs]
+  [464.485]  ? __pfx_btrfs_writepages+0x10/0x10 [btrfs]
+  [464.493]  ? is_bpf_text_address+0x6e/0x100
+  [464.500]  ? kernel_text_address+0x145/0x160
+  [464.507]  ? unwind_get_return_address+0x5e/0xa0
+  [464.514]  ? arch_stack_walk+0xac/0x100
+  [464.521]  do_writepages+0x176/0x780
+  [464.527]  ? lock_release+0x567/0x790
+  [464.533]  ? __pfx_do_writepages+0x10/0x10
+  [464.540]  ? __pfx_lock_acquire+0x10/0x10
+  [464.546]  ? __pfx_stack_trace_save+0x10/0x10
+  [464.553]  ? do_raw_spin_lock+0x12e/0x270
+  [464.560]  ? do_raw_spin_unlock+0x58/0x1f0
+  [464.566]  ? _raw_spin_unlock+0x23/0x40
+  [464.573]  ? wbc_attach_and_unlock_inode+0x3da/0x7d0
+  [464.580]  filemap_fdatawrite_wbc+0x113/0x180
+  [464.587]  ? prepare_pages.constprop.0+0x13c/0x5c0 [btrfs]
+  [464.596]  __filemap_fdatawrite_range+0xaf/0xf0
+  [464.603]  ? __pfx___filemap_fdatawrite_range+0x10/0x10
+  [464.611]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [464.618]  ? kasan_quarantine_put+0xd7/0x1e0
+  [464.625]  btrfs_start_ordered_extent+0x46f/0x570 [btrfs]
+  [464.633]  ? __pfx_btrfs_start_ordered_extent+0x10/0x10 [btrfs]
+  [464.642]  ? __clear_extent_bit+0x2c0/0x9d0 [btrfs]
+  [464.650]  btrfs_lock_and_flush_ordered_range+0xc6/0x180 [btrfs]
+  [464.659]  ? __pfx_btrfs_lock_and_flush_ordered_range+0x10/0x10 [btrfs]
+  [464.669]  btrfs_read_folio+0x12a/0x1d0 [btrfs]
+  [464.676]  ? __pfx_btrfs_read_folio+0x10/0x10 [btrfs]
+  [464.684]  ? __pfx_filemap_add_folio+0x10/0x10
+  [464.691]  ? __pfx___might_resched+0x10/0x10
+  [464.698]  ? __filemap_get_folio+0x1c5/0x450
+  [464.705]  prepare_uptodate_page+0x12e/0x4d0 [btrfs]
+  [464.713]  prepare_pages.constprop.0+0x13c/0x5c0 [btrfs]
+  [464.721]  ? fault_in_iov_iter_readable+0xd2/0x240
+  [464.729]  btrfs_buffered_write+0x5bd/0x12f0 [btrfs]
+  [464.737]  ? __pfx_btrfs_buffered_write+0x10/0x10 [btrfs]
+  [464.745]  ? __pfx_lock_release+0x10/0x10
+  [464.752]  ? generic_write_checks+0x275/0x400
+  [464.759]  ? down_write+0x118/0x1f0
+  [464.765]  ? up_write+0x19b/0x500
+  [464.770]  btrfs_direct_write+0x731/0xba0 [btrfs]
+  [464.778]  ? __pfx_btrfs_direct_write+0x10/0x10 [btrfs]
+  [464.785]  ? __pfx___might_resched+0x10/0x10
+  [464.792]  ? lock_acquire+0x435/0x500
+  [464.798]  ? lock_acquire+0x435/0x500
+  [464.804]  btrfs_do_write_iter+0x494/0x640 [btrfs]
+  [464.811]  ? __pfx_btrfs_do_write_iter+0x10/0x10 [btrfs]
+  [464.819]  ? __pfx___might_resched+0x10/0x10
+  [464.825]  ? rw_verify_area+0x6d/0x590
+  [464.831]  vfs_write+0x5d7/0xf50
+  [464.837]  ? __might_fault+0x9d/0x120
+  [464.843]  ? __pfx_vfs_write+0x10/0x10
+  [464.849]  ? btrfs_file_llseek+0xb1/0xfb0 [btrfs]
+  [464.856]  ? lock_release+0x567/0x790
+  [464.862]  ksys_write+0xfb/0x1d0
+  [464.867]  ? __pfx_ksys_write+0x10/0x10
+  [464.873]  ? _raw_spin_unlock+0x23/0x40
+  [464.879]  ? btrfs_getattr+0x4af/0x670 [btrfs]
+  [464.886]  ? vfs_getattr_nosec+0x79/0x340
+  [464.892]  do_syscall_64+0x95/0x180
+  [464.898]  ? __do_sys_newfstat+0xde/0xf0
+  [464.904]  ? __pfx___do_sys_newfstat+0x10/0x10
+  [464.911]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [464.918]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [464.925]  ? do_syscall_64+0xa1/0x180
+  [464.931]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [464.939]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [464.946]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [464.953]  ? btrfs_file_llseek+0xb1/0xfb0 [btrfs]
+  [464.960]  ? do_syscall_64+0xa1/0x180
+  [464.966]  ? btrfs_file_llseek+0xb1/0xfb0 [btrfs]
+  [464.973]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [464.980]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [464.987]  ? __pfx_btrfs_file_llseek+0x10/0x10 [btrfs]
+  [464.995]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [465.002]  ? __pfx_btrfs_file_llseek+0x10/0x10 [btrfs]
+  [465.010]  ? do_syscall_64+0xa1/0x180
+  [465.016]  ? lock_release+0x567/0x790
+  [465.022]  ? __pfx_lock_acquire+0x10/0x10
+  [465.028]  ? __pfx_lock_release+0x10/0x10
+  [465.034]  ? trace_irq_enable.constprop.0+0xce/0x110
+  [465.042]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [465.049]  ? do_syscall_64+0xa1/0x180
+  [465.055]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [465.062]  ? do_syscall_64+0xa1/0x180
+  [465.068]  ? syscall_exit_to_user_mode+0xac/0x2a0
+  [465.075]  ? do_syscall_64+0xa1/0x180
+  [465.081]  ? clear_bhb_loop+0x25/0x80
+  [465.087]  ? clear_bhb_loop+0x25/0x80
+  [465.093]  ? clear_bhb_loop+0x25/0x80
+  [465.099]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
+  [465.106] RIP: 0033:0x7f093b8ee784
+  [465.111] RSP: 002b:00007ffc29d31b28 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
+  [465.122] RAX: ffffffffffffffda RBX: 0000000000006000 RCX: 00007f093b8ee784
+  [465.131] RDX: 000000000001de00 RSI: 00007f093b6ed200 RDI: 0000000000000003
+  [465.141] RBP: 000000000001de00 R08: 0000000000006000 R09: 0000000000000000
+  [465.150] R10: 0000000000023e00 R11: 0000000000000202 R12: 0000000000006000
+  [465.160] R13: 0000000000023e00 R14: 0000000000023e00 R15: 0000000000000001
+  [465.170]  </TASK>
+  [465.174] INFO: lockdep is turned off.
+
+Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Fixes: 97713b1a2ced ("btrfs: do not clear page dirty inside extent_write_locked_range()")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 0486b1f911248..3bad7c0be1f10 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1420,6 +1420,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+               free_extent_map(em);
+               em = NULL;
+ 
++              /*
++               * Although the PageDirty bit might be cleared before entering
++               * this function, subpage dirty bit is not cleared.
++               * So clear subpage dirty bit here so next time we won't submit
++               * page for range already written to disk.
++               */
++              btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);
+               btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
+               if (!PageWriteback(page)) {
+                       btrfs_err(inode->root->fs_info,
+@@ -1427,13 +1434,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+                              page->index, cur, end);
+               }
+ 
+-              /*
+-               * Although the PageDirty bit is cleared before entering this
+-               * function, subpage dirty bit is not cleared.
+-               * So clear subpage dirty bit here so next time we won't submit
+-               * page for range already written to disk.
+-               */
+-              btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);
+ 
+               submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
+                                  cur - page_offset(page));
+-- 
+2.43.0
+
diff --git a/queue-6.10/drm-v3d-fix-out-of-bounds-read-in-v3d_csd_job_run.patch b/queue-6.10/drm-v3d-fix-out-of-bounds-read-in-v3d_csd_job_run.patch

new file mode 100644 (file)

index 0000000..e620fba
--- /dev/null
+++ b/queue-6.10/drm-v3d-fix-out-of-bounds-read-in-v3d_csd_job_run.patch
@@ -0,0 +1,88 @@
+From 40fcf9fc600b7a7604217aded770a8e6f611504e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 12:18:45 -0300
+Subject: drm/v3d: Fix out-of-bounds read in `v3d_csd_job_run()`
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+[ Upstream commit 497d370a644d95a9f04271aa92cb96d32e84c770 ]
+
+When enabling UBSAN on Raspberry Pi 5, we get the following warning:
+
+[  387.894977] UBSAN: array-index-out-of-bounds in drivers/gpu/drm/v3d/v3d_sched.c:320:3
+[  387.903868] index 7 is out of range for type '__u32 [7]'
+[  387.909692] CPU: 0 PID: 1207 Comm: kworker/u16:2 Tainted: G        WC         6.10.3-v8-16k-numa #151
+[  387.919166] Hardware name: Raspberry Pi 5 Model B Rev 1.0 (DT)
+[  387.925961] Workqueue: v3d_csd drm_sched_run_job_work [gpu_sched]
+[  387.932525] Call trace:
+[  387.935296]  dump_backtrace+0x170/0x1b8
+[  387.939403]  show_stack+0x20/0x38
+[  387.942907]  dump_stack_lvl+0x90/0xd0
+[  387.946785]  dump_stack+0x18/0x28
+[  387.950301]  __ubsan_handle_out_of_bounds+0x98/0xd0
+[  387.955383]  v3d_csd_job_run+0x3a8/0x438 [v3d]
+[  387.960707]  drm_sched_run_job_work+0x520/0x6d0 [gpu_sched]
+[  387.966862]  process_one_work+0x62c/0xb48
+[  387.971296]  worker_thread+0x468/0x5b0
+[  387.975317]  kthread+0x1c4/0x1e0
+[  387.978818]  ret_from_fork+0x10/0x20
+[  387.983014] ---[ end trace ]---
+
+This happens because the UAPI provides only seven configuration
+registers and we are reading the eighth position of this u32 array.
+
+Therefore, fix the out-of-bounds read in `v3d_csd_job_run()` by
+accessing only seven positions on the '__u32 [7]' array. The eighth
+register exists indeed on V3D 7.1, but it isn't currently used. That
+being so, let's guarantee that it remains unused and add a note that it
+could be set in a future patch.
+
+Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x")
+Reported-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240809152001.668314-1-mcanal@igalia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/v3d/v3d_sched.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
+index 30d5366d62883..0132403b8159f 100644
+--- a/drivers/gpu/drm/v3d/v3d_sched.c
++++ b/drivers/gpu/drm/v3d/v3d_sched.c
+@@ -315,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
+       struct v3d_dev *v3d = job->base.v3d;
+       struct drm_device *dev = &v3d->drm;
+       struct dma_fence *fence;
+-      int i, csd_cfg0_reg, csd_cfg_reg_count;
++      int i, csd_cfg0_reg;
+ 
+       v3d->csd_job = job;
+ 
+@@ -335,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
+       v3d_switch_perfmon(v3d, &job->base);
+ 
+       csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
+-      csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7;
+-      for (i = 1; i <= csd_cfg_reg_count; i++)
++      for (i = 1; i <= 6; i++)
+               V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
++
++      /* Although V3D 7.1 has an eighth configuration register, we are not
++       * using it. Therefore, make sure it remains unused.
++       *
++       * XXX: Set the CFG7 register
++       */
++      if (v3d->ver >= 71)
++              V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0);
++
+       /* CFG0 write kicks off the job. */
+       V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/filelock-fix-name-of-file_lease-slab-cache.patch b/queue-6.10/filelock-fix-name-of-file_lease-slab-cache.patch

new file mode 100644 (file)

index 0000000..03cc579
--- /dev/null
+++ b/queue-6.10/filelock-fix-name-of-file_lease-slab-cache.patch
@@ -0,0 +1,43 @@
+From aad802c664867c00812d0e0166b0327fde317344 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Jul 2024 15:48:12 -0700
+Subject: filelock: fix name of file_lease slab cache
+
+From: Omar Sandoval <osandov@fb.com>
+
+[ Upstream commit 3f65f3c099bcb27949e712f39ba836f21785924a ]
+
+When struct file_lease was split out from struct file_lock, the name of
+the file_lock slab cache was copied to the new slab cache for
+file_lease. This name conflict causes confusion in /proc/slabinfo and
+/sys/kernel/slab. In particular, it caused failures in drgn's test case
+for slab cache merging.
+
+Link: https://github.com/osandov/drgn/blob/9ad29fd86499eb32847473e928b6540872d3d59a/tests/linux_kernel/helpers/test_slab.py#L81
+Fixes: c69ff4071935 ("filelock: split leases out of struct file_lock")
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Link: https://lore.kernel.org/r/2d1d053da1cafb3e7940c4f25952da4f0af34e38.1722293276.git.osandov@fb.com
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/locks.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 9afb16e0683ff..e45cad40f8b6b 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2984,7 +2984,7 @@ static int __init filelock_init(void)
+       filelock_cache = kmem_cache_create("file_lock_cache",
+                       sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
+ 
+-      filelease_cache = kmem_cache_create("file_lock_cache",
++      filelease_cache = kmem_cache_create("file_lease_cache",
+                       sizeof(struct file_lease), 0, SLAB_PANIC, NULL);
+ 
+       for_each_possible_cpu(i) {
+-- 
+2.43.0
+
diff --git a/queue-6.10/gpio-mlxbf3-support-shutdown-function.patch b/queue-6.10/gpio-mlxbf3-support-shutdown-function.patch

new file mode 100644 (file)

index 0000000..7679e83
--- /dev/null
+++ b/queue-6.10/gpio-mlxbf3-support-shutdown-function.patch
@@ -0,0 +1,91 @@
+From bb7c217df1a5999ebc098b285de9a55b943dd880 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jun 2024 13:15:09 -0400
+Subject: gpio: mlxbf3: Support shutdown() function
+
+From: Asmaa Mnebhi <asmaa@nvidia.com>
+
+[ Upstream commit aad41832326723627ad8ac9ee8a543b6dca4454d ]
+
+During Linux graceful reboot, the GPIO interrupts are not disabled.
+Since the drivers are not removed during graceful reboot,
+the logic to call mlxbf3_gpio_irq_disable() is not triggered.
+Interrupts that remain enabled can cause issues on subsequent boots.
+
+For example, the mlxbf-gige driver contains PHY logic to bring up the link.
+If the gpio-mlxbf3 driver loads first, the mlxbf-gige driver
+will use a GPIO interrupt to bring up the link.
+Otherwise, it will use polling.
+The next time Linux boots and loads the drivers in this order, we encounter the issue:
+- mlxbf-gige loads first and uses polling while the GPIO10
+  interrupt is still enabled from the previous boot. So if
+  the interrupt triggers, there is nothing to clear it.
+- gpio-mlxbf3 loads.
+- i2c-mlxbf loads. The interrupt doesn't trigger for I2C
+  because it is shared with the GPIO interrupt line which
+  was not cleared.
+
+The solution is to add a shutdown function to the GPIO driver to clear and disable
+all interrupts. Also clear the interrupt after disabling it in mlxbf3_gpio_irq_disable().
+
+Fixes: 38a700efc510 ("gpio: mlxbf3: Add gpio driver support")
+Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Reviewed-by: David Thompson <davthompson@nvidia.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Link: https://lore.kernel.org/r/20240611171509.22151-1-asmaa@nvidia.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c
+index d5906d419b0ab..10ea71273c891 100644
+--- a/drivers/gpio/gpio-mlxbf3.c
++++ b/drivers/gpio/gpio-mlxbf3.c
+@@ -39,6 +39,8 @@
+ #define MLXBF_GPIO_CAUSE_OR_EVTEN0        0x14
+ #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE      0x18
+ 
++#define MLXBF_GPIO_CLR_ALL_INTS           GENMASK(31, 0)
++
+ struct mlxbf3_gpio_context {
+       struct gpio_chip gc;
+ 
+@@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd)
+       val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
+       val &= ~BIT(offset);
+       writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
++
++      writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE);
+       raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
+ 
+       gpiochip_disable_irq(gc, offset);
+@@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev)
+       return 0;
+ }
+ 
++static void mlxbf3_gpio_shutdown(struct platform_device *pdev)
++{
++      struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev);
++
++      /* Disable and clear all interrupts */
++      writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
++      writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE);
++}
++
+ static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = {
+       { "MLNXBF33", 0 },
+       {}
+@@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = {
+               .acpi_match_table = mlxbf3_gpio_acpi_match,
+       },
+       .probe    = mlxbf3_gpio_probe,
++      .shutdown = mlxbf3_gpio_shutdown,
+ };
+ module_platform_driver(mlxbf3_gpio_driver);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/gtp-pull-network-headers-in-gtp_dev_xmit.patch b/queue-6.10/gtp-pull-network-headers-in-gtp_dev_xmit.patch

new file mode 100644 (file)

index 0000000..c56f36a
--- /dev/null
+++ b/queue-6.10/gtp-pull-network-headers-in-gtp_dev_xmit.patch
@@ -0,0 +1,98 @@
+From 70c490935879f95a7d81403d107e7aa9a0bd7b31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 13:24:55 +0000
+Subject: gtp: pull network headers in gtp_dev_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3a3be7ff9224f424e485287b54be00d2c6bd9c40 ]
+
+syzbot/KMSAN reported use of uninit-value in get_dev_xmit() [1]
+
+We must make sure the IPv4 or Ipv6 header is pulled in skb->head
+before accessing fields in them.
+
+Use pskb_inet_may_pull() to fix this issue.
+
+[1]
+BUG: KMSAN: uninit-value in ipv6_pdp_find drivers/net/gtp.c:220 [inline]
+ BUG: KMSAN: uninit-value in gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline]
+ BUG: KMSAN: uninit-value in gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281
+  ipv6_pdp_find drivers/net/gtp.c:220 [inline]
+  gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline]
+  gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281
+  __netdev_start_xmit include/linux/netdevice.h:4913 [inline]
+  netdev_start_xmit include/linux/netdevice.h:4922 [inline]
+  xmit_one net/core/dev.c:3580 [inline]
+  dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3596
+  __dev_queue_xmit+0x358c/0x5610 net/core/dev.c:4423
+  dev_queue_xmit include/linux/netdevice.h:3105 [inline]
+  packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276
+  packet_snd net/packet/af_packet.c:3145 [inline]
+  packet_sendmsg+0x90e3/0xa3a0 net/packet/af_packet.c:3177
+  sock_sendmsg_nosec net/socket.c:730 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:745
+  __sys_sendto+0x685/0x830 net/socket.c:2204
+  __do_sys_sendto net/socket.c:2216 [inline]
+  __se_sys_sendto net/socket.c:2212 [inline]
+  __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212
+  x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:3994 [inline]
+  slab_alloc_node mm/slub.c:4037 [inline]
+  kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4080
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:583
+  __alloc_skb+0x363/0x7b0 net/core/skbuff.c:674
+  alloc_skb include/linux/skbuff.h:1320 [inline]
+  alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6526
+  sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2815
+  packet_alloc_skb net/packet/af_packet.c:2994 [inline]
+  packet_snd net/packet/af_packet.c:3088 [inline]
+  packet_sendmsg+0x749c/0xa3a0 net/packet/af_packet.c:3177
+  sock_sendmsg_nosec net/socket.c:730 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:745
+  __sys_sendto+0x685/0x830 net/socket.c:2204
+  __do_sys_sendto net/socket.c:2216 [inline]
+  __se_sys_sendto net/socket.c:2212 [inline]
+  __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212
+  x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45
+  do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+  do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+CPU: 0 UID: 0 PID: 7115 Comm: syz.1.515 Not tainted 6.11.0-rc1-syzkaller-00043-g94ede2a3e913 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024
+
+Fixes: 999cb275c807 ("gtp: add IPv6 support")
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Harald Welte <laforge@gnumonks.org>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Link: https://patch.msgid.link/20240808132455.3413916-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 427b91aca50d3..0696faf60013e 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1269,6 +1269,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+       if (skb_cow_head(skb, dev->needed_headroom))
+               goto tx_err;
+ 
++      if (!pskb_inet_may_pull(skb))
++              goto tx_err;
++
+       skb_reset_inner_headers(skb);
+ 
+       /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
+-- 
+2.43.0
+
diff --git a/queue-6.10/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch b/queue-6.10/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch

new file mode 100644 (file)

index 0000000..603f74d
--- /dev/null
+++ b/queue-6.10/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch
@@ -0,0 +1,149 @@
+From d8ac02f9fbb34a64d0dfd6255aa6e86a2a94be2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 6 Jul 2024 11:38:07 -0400
+Subject: igc: Fix packet still tx after gate close by reducing i226 MAC retry
+ buffer
+
+From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+
+[ Upstream commit e037a26ead187901f83cad9c503ccece5ff6817a ]
+
+Testing uncovered that even when the taprio gate is closed, some packets
+still transmit.
+
+According to i225/6 hardware errata [1], traffic might overflow the
+planned QBV window. This happens because MAC maintains an internal buffer,
+primarily for supporting half duplex retries. Therefore, even when the
+gate closes, residual MAC data in the buffer may still transmit.
+
+To mitigate this for i226, reduce the MAC's internal buffer from 192 bytes
+to the recommended 88 bytes by modifying the RETX_CTL register value.
+
+This follows guidelines from:
+[1] Ethernet Controller I225/I22 Spec Update Rev 2.1 Errata Item 9:
+    TSN: Packet Transmission Might Cross Qbv Window
+[2] I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control
+
+Note that the RETX_CTL register can't be used in TSN mode because half
+duplex feature cannot coexist with TSN.
+
+Test Steps:
+1.  Send taprio cmd to board A:
+    tc qdisc replace dev enp1s0 parent root handle 100 taprio \
+    num_tc 4 \
+    map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \
+    queues 1@0 1@1 1@2 1@3 \
+    base-time 0 \
+    sched-entry S 0x07 500000 \
+    sched-entry S 0x0f 500000 \
+    flags 0x2 \
+    txtime-delay 0
+
+    Note that for TC3, gate should open for 500us and close for another
+    500us.
+
+3.  Take tcpdump log on Board B.
+
+4.  Send udp packets via UDP tai app from Board A to Board B.
+
+5.  Analyze tcpdump log via wireshark log on Board B. Ensure that the
+    total time from the first to the last packet received during one cycle
+    for TC3 does not exceed 500us.
+
+Fixes: 43546211738e ("igc: Add new device ID's")
+Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_defines.h |  6 ++++
+ drivers/net/ethernet/intel/igc/igc_tsn.c     | 34 ++++++++++++++++++++
+ 2 files changed, 40 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
+index 5f92b3c7c3d4a..511384f3ec5cb 100644
+--- a/drivers/net/ethernet/intel/igc/igc_defines.h
++++ b/drivers/net/ethernet/intel/igc/igc_defines.h
+@@ -404,6 +404,12 @@
+ #define IGC_DTXMXPKTSZ_TSN    0x19 /* 1600 bytes of max TX DMA packet size */
+ #define IGC_DTXMXPKTSZ_DEFAULT        0x98 /* 9728-byte Jumbo frames */
+ 
++/* Retry Buffer Control */
++#define IGC_RETX_CTL                  0x041C
++#define IGC_RETX_CTL_WATERMARK_MASK   0xF
++#define IGC_RETX_CTL_QBVFULLTH_SHIFT  8 /* QBV Retry Buffer Full Threshold */
++#define IGC_RETX_CTL_QBVFULLEN        0x1000 /* Enable QBV Retry Buffer Full Threshold */
++
+ /* Transmit Scheduling Latency */
+ /* Latency between transmission scheduling (LaunchTime) and the time
+  * the packet is transmitted to the network in nanosecond.
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 22cefb1eeedfa..46d4c3275bbb5 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -78,6 +78,15 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
+       wr32(IGC_GTXOFFSET, txoffset);
+ }
+ 
++static void igc_tsn_restore_retx_default(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++      u32 retxctl;
++
++      retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK;
++      wr32(IGC_RETX_CTL, retxctl);
++}
++
+ /* Returns the TSN specific registers to their default values after
+  * the adapter is reset.
+  */
+@@ -91,6 +100,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+       wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+       wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
+ 
++      if (igc_is_device_id_i226(hw))
++              igc_tsn_restore_retx_default(adapter);
++
+       tqavctrl = rd32(IGC_TQAVCTRL);
+       tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
+                     IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS);
+@@ -111,6 +123,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+       return 0;
+ }
+ 
++/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes
++ * to 88 Bytes by setting RETX_CTL register using the recommendation from:
++ * a) Ethernet Controller I225/I226 Specification Update Rev 2.1
++ *    Item 9: TSN: Packet Transmission Might Cross the Qbv Window
++ * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control
++ */
++static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++      u32 retxctl, watermark;
++
++      retxctl = rd32(IGC_RETX_CTL);
++      watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK;
++      /* Set QBVFULLTH value using watermark and set QBVFULLEN */
++      retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) |
++                 IGC_RETX_CTL_QBVFULLEN;
++      wr32(IGC_RETX_CTL, retxctl);
++}
++
+ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+ {
+       struct igc_hw *hw = &adapter->hw;
+@@ -123,6 +154,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+       wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
+       wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
+ 
++      if (igc_is_device_id_i226(hw))
++              igc_tsn_set_retx_qbvfullthreshold(adapter);
++
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+               u32 txqctl = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.10/igc-fix-qbv-tx-latency-by-setting-gtxoffset.patch b/queue-6.10/igc-fix-qbv-tx-latency-by-setting-gtxoffset.patch

new file mode 100644 (file)

index 0000000..9496483
--- /dev/null
+++ b/queue-6.10/igc-fix-qbv-tx-latency-by-setting-gtxoffset.patch
@@ -0,0 +1,95 @@
+From 465add7f58f182e53575b01715c43e5c118688f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Jul 2024 08:53:18 -0400
+Subject: igc: Fix qbv tx latency by setting gtxoffset
+
+From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+
+[ Upstream commit 6c3fc0b1c3d073bd6fc3bf43dbd0e64240537464 ]
+
+A large tx latency issue was discovered during testing when only QBV was
+enabled. The issue occurs because gtxoffset was not set when QBV is
+active, it was only set when launch time is active.
+
+The patch "igc: Correct the launchtime offset" only sets gtxoffset when
+the launchtime_enable field is set by the user. Enabling launchtime_enable
+ultimately sets the register IGC_TXQCTL_QUEUE_MODE_LAUNCHT (referred to as
+LaunchT in the SW user manual).
+
+Section 7.5.2.6 of the IGC i225/6 SW User Manual Rev 1.2.4 states:
+"The latency between transmission scheduling (launch time) and the
+time the packet is transmitted to the network is listed in Table 7-61."
+
+However, the patch misinterprets the phrase "launch time" in that section
+by assuming it specifically refers to the LaunchT register, whereas it
+actually denotes the generic term for when a packet is released from the
+internal buffer to the MAC transmit logic.
+
+This launch time, as per that section, also implicitly refers to the QBV
+gate open time, where a packet waits in the buffer for the QBV gate to
+open. Therefore, latency applies whenever QBV is in use. TSN features such
+as QBU and QAV reuse QBV, making the latency universal to TSN features.
+
+Discussed with i226 HW owner (Shalev, Avi) and we were in agreement that
+the term "launch time" used in Section 7.5.2.6 is not clear and can be
+easily misinterpreted. Avi will update this section to:
+"When TQAVCTRL.TRANSMIT_MODE = TSN, the latency between transmission
+scheduling and the time the packet is transmitted to the network is listed
+in Table 7-61."
+
+Fix this issue by using igc_tsn_is_tx_mode_in_tsn() as a condition to
+write to gtxoffset, aligning with the newly updated SW User Manual.
+
+Tested:
+1. Enrol taprio on talker board
+   base-time 0
+   cycle-time 1000000
+   flags 0x2
+   index 0 cmd S gatemask 0x1 interval1
+   index 0 cmd S gatemask 0x1 interval2
+
+   Note:
+   interval1 = interval for a 64 bytes packet to go through
+   interval2 = cycle-time - interval1
+
+2. Take tcpdump on listener board
+
+3. Use udp tai app on talker to send packets to listener
+
+4. Check the timestamp on listener via wireshark
+
+Test Result:
+100 Mbps: 113 ~193 ns
+1000 Mbps: 52 ~ 84 ns
+2500 Mbps: 95 ~ 223 ns
+
+Note that the test result is similar to the patch "igc: Correct the
+launchtime offset".
+
+Fixes: 790835fcc0cb ("igc: Correct the launchtime offset")
+Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index ada7514305171..d68fa7f3d5f07 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -61,7 +61,7 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
+       struct igc_hw *hw = &adapter->hw;
+       u16 txoffset;
+ 
+-      if (!is_any_launchtime(adapter))
++      if (!igc_tsn_is_tx_mode_in_tsn(adapter))
+               return;
+ 
+       switch (adapter->link_speed) {
+-- 
+2.43.0
+
diff --git a/queue-6.10/igc-fix-qbv_config_change_errors-logics.patch b/queue-6.10/igc-fix-qbv_config_change_errors-logics.patch

new file mode 100644 (file)

index 0000000..d8eb4ff
--- /dev/null
+++ b/queue-6.10/igc-fix-qbv_config_change_errors-logics.patch
@@ -0,0 +1,131 @@
+From 864befb514567764e30a68754bc9df8f3bd7b969 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Jul 2024 08:53:16 -0400
+Subject: igc: Fix qbv_config_change_errors logics
+
+From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+
+[ Upstream commit f8d6acaee9d35cbff3c3cfad94641666c596f8da ]
+
+When user issues these cmds:
+1. Either a) or b)
+   a) mqprio with hardware offload disabled
+   b) taprio with txtime-assist feature enabled
+2. etf
+3. tc qdisc delete
+4. taprio with base time in the past
+
+At step 4, qbv_config_change_errors wrongly increased by 1.
+
+Excerpt from IEEE 802.1Q-2018 8.6.9.3.1:
+"If AdminBaseTime specifies a time in the past, and the current schedule
+is running, then: Increment ConfigChangeError counter"
+
+qbv_config_change_errors should only increase if base time is in the past
+and no taprio is active. In user perspective, taprio was not active when
+first triggered at step 4. However, i225/6 reuses qbv for etf, so qbv is
+enabled with a dummy schedule at step 2 where it enters
+igc_tsn_enable_offload() and qbv_count got incremented to 1. At step 4, it
+enters igc_tsn_enable_offload() again, qbv_count is incremented to 2.
+Because taprio is running, tc_setup_type is TC_SETUP_QDISC_ETF and
+qbv_count > 1, qbv_config_change_errors value got incremented.
+
+This issue happens due to reliance on qbv_count field where a non-zero
+value indicates that taprio is running. But qbv_count increases
+regardless if taprio is triggered by user or by other tsn feature. It does
+not align with qbv_config_change_errors expectation where it is only
+concerned with taprio triggered by user.
+
+Fixing this by relocating the qbv_config_change_errors logic to
+igc_save_qbv_schedule(), eliminating reliance on qbv_count and its
+inaccuracies from i225/6's multiple uses of qbv feature for other TSN
+features.
+
+The new function created: igc_tsn_is_taprio_activated_by_user() uses
+taprio_offload_enable field to indicate that the current running taprio
+was triggered by user, instead of triggered by non-qbv feature like etf.
+
+Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter")
+Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c |  8 ++++++--
+ drivers/net/ethernet/intel/igc/igc_tsn.c  | 16 ++++++++--------
+ drivers/net/ethernet/intel/igc/igc_tsn.h  |  1 +
+ 3 files changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 33069880c86c0..3041f8142324f 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6319,12 +6319,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       if (!validate_schedule(adapter, qopt))
+               return -EINVAL;
+ 
++      igc_ptp_read(adapter, &now);
++
++      if (igc_tsn_is_taprio_activated_by_user(adapter) &&
++          is_base_time_past(qopt->base_time, &now))
++              adapter->qbv_config_change_errors++;
++
+       adapter->cycle_time = qopt->cycle_time;
+       adapter->base_time = qopt->base_time;
+       adapter->taprio_offload_enable = true;
+ 
+-      igc_ptp_read(adapter, &now);
+-
+       for (n = 0; n < qopt->num_entries; n++) {
+               struct tc_taprio_sched_entry *e = &qopt->entries[n];
+ 
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 46d4c3275bbb5..8ed7b965484da 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -87,6 +87,14 @@ static void igc_tsn_restore_retx_default(struct igc_adapter *adapter)
+       wr32(IGC_RETX_CTL, retxctl);
+ }
+ 
++bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++
++      return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
++              adapter->taprio_offload_enable;
++}
++
+ /* Returns the TSN specific registers to their default values after
+  * the adapter is reset.
+  */
+@@ -296,14 +304,6 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+               s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle);
+ 
+               base_time = ktime_add_ns(base_time, (n + 1) * cycle);
+-
+-              /* Increase the counter if scheduling into the past while
+-               * Gate Control List (GCL) is running.
+-               */
+-              if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
+-                  (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
+-                  (adapter->qbv_count > 1))
+-                      adapter->qbv_config_change_errors++;
+       } else {
+               if (igc_is_device_id_i226(hw)) {
+                       ktime_t adjust_time, expires_time;
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
+index b53e6af560b73..98ec845a86bf0 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.h
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
+@@ -7,5 +7,6 @@
+ int igc_tsn_offload_apply(struct igc_adapter *adapter);
+ int igc_tsn_reset(struct igc_adapter *adapter);
+ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter);
++bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter);
+ 
+ #endif /* _IGC_BASE_H */
+-- 
+2.43.0
+
diff --git a/queue-6.10/igc-fix-reset-adapter-logics-when-tx-mode-change.patch b/queue-6.10/igc-fix-reset-adapter-logics-when-tx-mode-change.patch

new file mode 100644 (file)

index 0000000..7b12a6a
--- /dev/null
+++ b/queue-6.10/igc-fix-reset-adapter-logics-when-tx-mode-change.patch
@@ -0,0 +1,105 @@
+From deed0e3f930a0a9c82e35e8e530aaceab7f466f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Jul 2024 08:53:17 -0400
+Subject: igc: Fix reset adapter logics when tx mode change
+
+From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+
+[ Upstream commit 0afeaeb5dae86aceded0d5f0c3a54d27858c0c6f ]
+
+Following the "igc: Fix TX Hang issue when QBV Gate is close" changes,
+remaining issues with the reset adapter logic in igc_tsn_offload_apply()
+have been observed:
+
+1. The reset adapter logics for i225 and i226 differ, although they should
+   be the same according to the guidelines in I225/6 HW Design Section
+   7.5.2.1 on software initialization during tx mode changes.
+2. The i225 resets adapter every time, even though tx mode doesn't change.
+   This occurs solely based on the condition  igc_is_device_id_i225() when
+   calling schedule_work().
+3. i226 doesn't reset adapter for tsn->legacy tx mode changes. It only
+   resets adapter for legacy->tsn tx mode transitions.
+4. qbv_count introduced in the patch is actually not needed; in this
+   context, a non-zero value of qbv_count is used to indicate if tx mode
+   was unconditionally set to tsn in igc_tsn_enable_offload(). This could
+   be replaced by checking the existing register
+   IGC_TQAVCTRL_TRANSMIT_MODE_TSN bit.
+
+This patch resolves all issues and enters schedule_work() to reset the
+adapter only when changing tx mode. It also removes reliance on qbv_count.
+
+qbv_count field will be removed in a future patch.
+
+Test ran:
+
+1. Verify reset adapter behaviour in i225/6:
+   a) Enrol a new GCL
+      Reset adapter observed (tx mode change legacy->tsn)
+   b) Enrol a new GCL without deleting qdisc
+      No reset adapter observed (tx mode remain tsn->tsn)
+   c) Delete qdisc
+      Reset adapter observed (tx mode change tsn->legacy)
+
+2. Tested scenario from "igc: Fix TX Hang issue when QBV Gate is closed"
+   to confirm it remains resolved.
+
+Fixes: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed")
+Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_tsn.c | 24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 8ed7b965484da..ada7514305171 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -49,6 +49,13 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+       return new_flags;
+ }
+ 
++static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++
++      return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN);
++}
++
+ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
+ {
+       struct igc_hw *hw = &adapter->hw;
+@@ -365,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter)
+       return err;
+ }
+ 
+-int igc_tsn_offload_apply(struct igc_adapter *adapter)
++static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter)
+ {
+-      struct igc_hw *hw = &adapter->hw;
++      bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) &
++                                IGC_FLAG_TSN_ANY_ENABLED);
++
++      return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) ||
++             (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter));
++}
+ 
+-      /* Per I225/6 HW Design Section 7.5.2.1, transmit mode
+-       * cannot be changed dynamically. Require reset the adapter.
++int igc_tsn_offload_apply(struct igc_adapter *adapter)
++{
++      /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change
++       * from legacy->tsn or tsn->legacy, then reset adapter is needed.
+        */
+       if (netif_running(adapter->netdev) &&
+-          (igc_is_device_id_i225(hw) || !adapter->qbv_count)) {
++          igc_tsn_will_tx_mode_change(adapter)) {
+               schedule_work(&adapter->reset_task);
+               return 0;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.10/iommu-restore-lost-return-in-iommu_report_device_fau.patch b/queue-6.10/iommu-restore-lost-return-in-iommu_report_device_fau.patch

new file mode 100644 (file)

index 0000000..0b8f92a
--- /dev/null
+++ b/queue-6.10/iommu-restore-lost-return-in-iommu_report_device_fau.patch
@@ -0,0 +1,43 @@
+From f14cc7fb69bb4d79d9dfee9ab4678b11ea2cdc84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Aug 2024 09:26:04 -0300
+Subject: iommu: Restore lost return in iommu_report_device_fault()
+
+From: Barak Biber <bbiber@nvidia.com>
+
+[ Upstream commit fca5b78511e98bdff2cdd55c172b23200a7b3404 ]
+
+When iommu_report_device_fault gets called with a partial fault it is
+supposed to collect the fault into the group and then return.
+
+Instead the return was accidently deleted which results in trying to
+process the fault and an eventual crash.
+
+Deleting the return was a typo, put it back.
+
+Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void")
+Signed-off-by: Barak Biber <bbiber@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
+Link: https://lore.kernel.org/r/0-v1-e7153d9c8cee+1c6-iommu_fault_fix_jgg@nvidia.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/io-pgfault.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
+index 06d78fcc79fdb..f2c87c695a17c 100644
+--- a/drivers/iommu/io-pgfault.c
++++ b/drivers/iommu/io-pgfault.c
+@@ -192,6 +192,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
+               report_partial_fault(iopf_param, fault);
+               iopf_put_dev_fault_param(iopf_param);
+               /* A request that is not the last does not need to be ack'd */
++              return;
+       }
+ 
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.10/kallsyms-do-not-cleanup-.llvm.-hash-suffix-before-so.patch b/queue-6.10/kallsyms-do-not-cleanup-.llvm.-hash-suffix-before-so.patch

new file mode 100644 (file)

index 0000000..c9fa7f7
--- /dev/null
+++ b/queue-6.10/kallsyms-do-not-cleanup-.llvm.-hash-suffix-before-so.patch
@@ -0,0 +1,122 @@
+From b3885e22f2cd2170fd9ff6dea79005cae96cbfe0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Aug 2024 15:05:12 -0700
+Subject: kallsyms: Do not cleanup .llvm.<hash> suffix before sorting symbols
+
+From: Song Liu <song@kernel.org>
+
+[ Upstream commit 020925ce92990c3bf59ab2cde386ac6d9ec734ff ]
+
+Cleaning up the symbols causes various issues afterwards. Let's sort
+the list based on original name.
+
+Signed-off-by: Song Liu <song@kernel.org>
+Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Acked-by: Petr Mladek <pmladek@suse.com>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Link: https://lore.kernel.org/r/20240807220513.3100483-2-song@kernel.org
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kallsyms.c      | 31 ++-----------------------------
+ scripts/link-vmlinux.sh |  4 ----
+ 2 files changed, 2 insertions(+), 33 deletions(-)
+
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 55a423519f2e5..839d9c49f28ce 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -5,8 +5,7 @@
+  * This software may be used and distributed according to the terms
+  * of the GNU General Public License, incorporated herein by reference.
+  *
+- * Usage: kallsyms [--all-symbols] [--absolute-percpu]
+- *                         [--lto-clang] in.map > out.S
++ * Usage: kallsyms [--all-symbols] [--absolute-percpu]  in.map > out.S
+  *
+  *      Table compression uses all the unused char codes on the symbols and
+  *  maps these to the most used substrings (tokens). For instance, it might
+@@ -63,7 +62,6 @@ static struct sym_entry **table;
+ static unsigned int table_size, table_cnt;
+ static int all_symbols;
+ static int absolute_percpu;
+-static int lto_clang;
+ 
+ static int token_profit[0x10000];
+ 
+@@ -74,8 +72,7 @@ static unsigned char best_table_len[256];
+ 
+ static void usage(void)
+ {
+-      fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
+-                      "[--lto-clang] in.map > out.S\n");
++      fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n");
+       exit(1);
+ }
+ 
+@@ -345,25 +342,6 @@ static int symbol_absolute(const struct sym_entry *s)
+       return s->percpu_absolute;
+ }
+ 
+-static void cleanup_symbol_name(char *s)
+-{
+-      char *p;
+-
+-      /*
+-       * ASCII[.]   = 2e
+-       * ASCII[0-9] = 30,39
+-       * ASCII[A-Z] = 41,5a
+-       * ASCII[_]   = 5f
+-       * ASCII[a-z] = 61,7a
+-       *
+-       * As above, replacing the first '.' in ".llvm." with '\0' does not
+-       * affect the main sorting, but it helps us with subsorting.
+-       */
+-      p = strstr(s, ".llvm.");
+-      if (p)
+-              *p = '\0';
+-}
+-
+ static int compare_names(const void *a, const void *b)
+ {
+       int ret;
+@@ -528,10 +506,6 @@ static void write_src(void)
+       output_address(relative_base);
+       printf("\n");
+ 
+-      if (lto_clang)
+-              for (i = 0; i < table_cnt; i++)
+-                      cleanup_symbol_name((char *)table[i]->sym);
+-
+       sort_symbols_by_name();
+       output_label("kallsyms_seqs_of_names");
+       for (i = 0; i < table_cnt; i++)
+@@ -808,7 +782,6 @@ int main(int argc, char **argv)
+               static const struct option long_options[] = {
+                       {"all-symbols",     no_argument, &all_symbols,     1},
+                       {"absolute-percpu", no_argument, &absolute_percpu, 1},
+-                      {"lto-clang",       no_argument, &lto_clang,       1},
+                       {},
+               };
+ 
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 31581504489ef..1e41b330550e6 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -159,10 +159,6 @@ kallsyms()
+               kallsymopt="${kallsymopt} --absolute-percpu"
+       fi
+ 
+-      if is_enabled CONFIG_LTO_CLANG; then
+-              kallsymopt="${kallsymopt} --lto-clang"
+-      fi
+-
+       info KSYMS "${2}.S"
+       scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S"
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/kallsyms-get-rid-of-code-for-absolute-kallsyms.patch b/queue-6.10/kallsyms-get-rid-of-code-for-absolute-kallsyms.patch

new file mode 100644 (file)

index 0000000..913cce7
--- /dev/null
+++ b/queue-6.10/kallsyms-get-rid-of-code-for-absolute-kallsyms.patch
@@ -0,0 +1,272 @@
+From e484fe8c045b9ea970cc63e49fa02f31da968a1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 21:26:53 +0100
+Subject: kallsyms: get rid of code for absolute kallsyms
+
+From: Jann Horn <jannh@google.com>
+
+[ Upstream commit 64e166099b69bfc09f667253358a15160b86ea43 ]
+
+Commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture")
+removed the last use of the absolute kallsyms.
+
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Arnd Bergmann <arnd@arndb.de>
+Link: https://lore.kernel.org/all/20240221202655.2423854-1-jannh@google.com/
+[masahiroy@kernel.org: rebase the code and reword the commit description]
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Stable-dep-of: 020925ce9299 ("kallsyms: Do not cleanup .llvm.<hash> suffix before sorting symbols")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ init/Kconfig                        | 18 -------
+ kernel/kallsyms.c                   |  5 +-
+ kernel/kallsyms_internal.h          |  1 -
+ kernel/vmcore_info.c                |  4 --
+ scripts/kallsyms.c                  | 78 ++++++++++++-----------------
+ scripts/link-vmlinux.sh             |  4 --
+ tools/perf/tests/vmlinux-kallsyms.c |  1 -
+ 7 files changed, 33 insertions(+), 78 deletions(-)
+
+diff --git a/init/Kconfig b/init/Kconfig
+index d8a971b804d32..6e97693b675f2 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1789,24 +1789,6 @@ config KALLSYMS_ABSOLUTE_PERCPU
+       depends on KALLSYMS
+       default X86_64 && SMP
+ 
+-config KALLSYMS_BASE_RELATIVE
+-      bool
+-      depends on KALLSYMS
+-      default y
+-      help
+-        Instead of emitting them as absolute values in the native word size,
+-        emit the symbol references in the kallsyms table as 32-bit entries,
+-        each containing a relative value in the range [base, base + U32_MAX]
+-        or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
+-        an absolute value in the range [0, S32_MAX] or a relative value in the
+-        range [base, base + S32_MAX], where base is the lowest relative symbol
+-        address encountered in the image.
+-
+-        On 64-bit builds, this reduces the size of the address table by 50%,
+-        but more importantly, it results in entries whose values are build
+-        time constants, and no relocation pass is required at runtime to fix
+-        up the entries based on the runtime load address of the kernel.
+-
+ # end of the "standard kernel features (expert users)" menu
+ 
+ config ARCH_HAS_MEMBARRIER_CALLBACKS
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index 98b9622d372e4..fb2c77368d187 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -148,9 +148,6 @@ static unsigned int get_symbol_offset(unsigned long pos)
+ 
+ unsigned long kallsyms_sym_address(int idx)
+ {
+-      if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
+-              return kallsyms_addresses[idx];
+-
+       /* values are unsigned offsets if --absolute-percpu is not in effect */
+       if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU))
+               return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
+@@ -325,7 +322,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
+       unsigned long symbol_start = 0, symbol_end = 0;
+       unsigned long i, low, high, mid;
+ 
+-      /* Do a binary search on the sorted kallsyms_addresses array. */
++      /* Do a binary search on the sorted kallsyms_offsets array. */
+       low = 0;
+       high = kallsyms_num_syms;
+ 
+diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
+index 925f2ab22639a..9633782f82500 100644
+--- a/kernel/kallsyms_internal.h
++++ b/kernel/kallsyms_internal.h
+@@ -4,7 +4,6 @@
+ 
+ #include <linux/types.h>
+ 
+-extern const unsigned long kallsyms_addresses[];
+ extern const int kallsyms_offsets[];
+ extern const u8 kallsyms_names[];
+ 
+diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
+index 1d5eadd9dd61c..8b4f8cc2e0ec0 100644
+--- a/kernel/vmcore_info.c
++++ b/kernel/vmcore_info.c
+@@ -216,12 +216,8 @@ static int __init crash_save_vmcoreinfo_init(void)
+       VMCOREINFO_SYMBOL(kallsyms_num_syms);
+       VMCOREINFO_SYMBOL(kallsyms_token_table);
+       VMCOREINFO_SYMBOL(kallsyms_token_index);
+-#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
+       VMCOREINFO_SYMBOL(kallsyms_offsets);
+       VMCOREINFO_SYMBOL(kallsyms_relative_base);
+-#else
+-      VMCOREINFO_SYMBOL(kallsyms_addresses);
+-#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */
+ #endif /* CONFIG_KALLSYMS */
+ 
+       arch_crash_save_vmcoreinfo();
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index fa53b5eef5530..55a423519f2e5 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -6,7 +6,7 @@
+  * of the GNU General Public License, incorporated herein by reference.
+  *
+  * Usage: kallsyms [--all-symbols] [--absolute-percpu]
+- *                         [--base-relative] [--lto-clang] in.map > out.S
++ *                         [--lto-clang] in.map > out.S
+  *
+  *      Table compression uses all the unused char codes on the symbols and
+  *  maps these to the most used substrings (tokens). For instance, it might
+@@ -63,7 +63,6 @@ static struct sym_entry **table;
+ static unsigned int table_size, table_cnt;
+ static int all_symbols;
+ static int absolute_percpu;
+-static int base_relative;
+ static int lto_clang;
+ 
+ static int token_profit[0x10000];
+@@ -76,7 +75,7 @@ static unsigned char best_table_len[256];
+ static void usage(void)
+ {
+       fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
+-                      "[--base-relative] [--lto-clang] in.map > out.S\n");
++                      "[--lto-clang] in.map > out.S\n");
+       exit(1);
+ }
+ 
+@@ -491,54 +490,43 @@ static void write_src(void)
+               printf("\t.short\t%d\n", best_idx[i]);
+       printf("\n");
+ 
+-      if (!base_relative)
+-              output_label("kallsyms_addresses");
+-      else
+-              output_label("kallsyms_offsets");
++      output_label("kallsyms_offsets");
+ 
+       for (i = 0; i < table_cnt; i++) {
+-              if (base_relative) {
+-                      /*
+-                       * Use the offset relative to the lowest value
+-                       * encountered of all relative symbols, and emit
+-                       * non-relocatable fixed offsets that will be fixed
+-                       * up at runtime.
+-                       */
++              /*
++               * Use the offset relative to the lowest value
++               * encountered of all relative symbols, and emit
++               * non-relocatable fixed offsets that will be fixed
++               * up at runtime.
++               */
+ 
+-                      long long offset;
+-                      int overflow;
+-
+-                      if (!absolute_percpu) {
+-                              offset = table[i]->addr - relative_base;
+-                              overflow = (offset < 0 || offset > UINT_MAX);
+-                      } else if (symbol_absolute(table[i])) {
+-                              offset = table[i]->addr;
+-                              overflow = (offset < 0 || offset > INT_MAX);
+-                      } else {
+-                              offset = relative_base - table[i]->addr - 1;
+-                              overflow = (offset < INT_MIN || offset >= 0);
+-                      }
+-                      if (overflow) {
+-                              fprintf(stderr, "kallsyms failure: "
+-                                      "%s symbol value %#llx out of range in relative mode\n",
+-                                      symbol_absolute(table[i]) ? "absolute" : "relative",
+-                                      table[i]->addr);
+-                              exit(EXIT_FAILURE);
+-                      }
+-                      printf("\t.long\t%#x    /* %s */\n", (int)offset, table[i]->sym);
+-              } else if (!symbol_absolute(table[i])) {
+-                      output_address(table[i]->addr);
++              long long offset;
++              int overflow;
++
++              if (!absolute_percpu) {
++                      offset = table[i]->addr - relative_base;
++                      overflow = (offset < 0 || offset > UINT_MAX);
++              } else if (symbol_absolute(table[i])) {
++                      offset = table[i]->addr;
++                      overflow = (offset < 0 || offset > INT_MAX);
+               } else {
+-                      printf("\tPTR\t%#llx\n", table[i]->addr);
++                      offset = relative_base - table[i]->addr - 1;
++                      overflow = (offset < INT_MIN || offset >= 0);
++              }
++              if (overflow) {
++                      fprintf(stderr, "kallsyms failure: "
++                              "%s symbol value %#llx out of range in relative mode\n",
++                              symbol_absolute(table[i]) ? "absolute" : "relative",
++                              table[i]->addr);
++                      exit(EXIT_FAILURE);
+               }
++              printf("\t.long\t%#x    /* %s */\n", (int)offset, table[i]->sym);
+       }
+       printf("\n");
+ 
+-      if (base_relative) {
+-              output_label("kallsyms_relative_base");
+-              output_address(relative_base);
+-              printf("\n");
+-      }
++      output_label("kallsyms_relative_base");
++      output_address(relative_base);
++      printf("\n");
+ 
+       if (lto_clang)
+               for (i = 0; i < table_cnt; i++)
+@@ -820,7 +808,6 @@ int main(int argc, char **argv)
+               static const struct option long_options[] = {
+                       {"all-symbols",     no_argument, &all_symbols,     1},
+                       {"absolute-percpu", no_argument, &absolute_percpu, 1},
+-                      {"base-relative",   no_argument, &base_relative,   1},
+                       {"lto-clang",       no_argument, &lto_clang,       1},
+                       {},
+               };
+@@ -841,8 +828,7 @@ int main(int argc, char **argv)
+       if (absolute_percpu)
+               make_percpus_absolute();
+       sort_symbols();
+-      if (base_relative)
+-              record_relative_base();
++      record_relative_base();
+       optimize_token_table();
+       write_src();
+ 
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 83d605ba7241a..31581504489ef 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -159,10 +159,6 @@ kallsyms()
+               kallsymopt="${kallsymopt} --absolute-percpu"
+       fi
+ 
+-      if is_enabled CONFIG_KALLSYMS_BASE_RELATIVE; then
+-              kallsymopt="${kallsymopt} --base-relative"
+-      fi
+-
+       if is_enabled CONFIG_LTO_CLANG; then
+               kallsymopt="${kallsymopt} --lto-clang"
+       fi
+diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
+index e30fd55f8e51d..cd3b480d20bd6 100644
+--- a/tools/perf/tests/vmlinux-kallsyms.c
++++ b/tools/perf/tests/vmlinux-kallsyms.c
+@@ -26,7 +26,6 @@ static bool is_ignored_symbol(const char *name, char type)
+                * when --all-symbols is specified so exclude them to get a
+                * stable symbol list.
+                */
+-              "kallsyms_addresses",
+               "kallsyms_offsets",
+               "kallsyms_relative_base",
+               "kallsyms_num_syms",
+-- 
+2.43.0
+
diff --git a/queue-6.10/kallsyms-match-symbols-exactly-with-config_lto_clang.patch b/queue-6.10/kallsyms-match-symbols-exactly-with-config_lto_clang.patch

new file mode 100644 (file)

index 0000000..789d7cc
--- /dev/null
+++ b/queue-6.10/kallsyms-match-symbols-exactly-with-config_lto_clang.patch
@@ -0,0 +1,192 @@
+From 19aa6d51bd08efbfe9778b3d7883fcf31ec900cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Aug 2024 15:05:13 -0700
+Subject: kallsyms: Match symbols exactly with CONFIG_LTO_CLANG
+
+From: Song Liu <song@kernel.org>
+
+[ Upstream commit fb6a421fb6153d97cf3058f9bd550b377b76a490 ]
+
+With CONFIG_LTO_CLANG=y, the compiler may add .llvm.<hash> suffix to
+function names to avoid duplication. APIs like kallsyms_lookup_name()
+and kallsyms_on_each_match_symbol() tries to match these symbol names
+without the .llvm.<hash> suffix, e.g., match "c_stop" with symbol
+c_stop.llvm.17132674095431275852. This turned out to be problematic
+for use cases that require exact match, for example, livepatch.
+
+Fix this by making the APIs to match symbols exactly.
+
+Also cleanup kallsyms_selftests accordingly.
+
+Signed-off-by: Song Liu <song@kernel.org>
+Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Acked-by: Petr Mladek <pmladek@suse.com>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Link: https://lore.kernel.org/r/20240807220513.3100483-3-song@kernel.org
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kallsyms.c          | 55 +++++---------------------------------
+ kernel/kallsyms_selftest.c | 22 +--------------
+ 2 files changed, 7 insertions(+), 70 deletions(-)
+
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index fb2c77368d187..a9a0ca605d4a8 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx)
+       return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
+ }
+ 
+-static void cleanup_symbol_name(char *s)
+-{
+-      char *res;
+-
+-      if (!IS_ENABLED(CONFIG_LTO_CLANG))
+-              return;
+-
+-      /*
+-       * LLVM appends various suffixes for local functions and variables that
+-       * must be promoted to global scope as part of LTO.  This can break
+-       * hooking of static functions with kprobes. '.' is not a valid
+-       * character in an identifier in C. Suffixes only in LLVM LTO observed:
+-       * - foo.llvm.[0-9a-f]+
+-       */
+-      res = strstr(s, ".llvm.");
+-      if (res)
+-              *res = '\0';
+-
+-      return;
+-}
+-
+-static int compare_symbol_name(const char *name, char *namebuf)
+-{
+-      /* The kallsyms_seqs_of_names is sorted based on names after
+-       * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled.
+-       * To ensure correct bisection in kallsyms_lookup_names(), do
+-       * cleanup_symbol_name(namebuf) before comparing name and namebuf.
+-       */
+-      cleanup_symbol_name(namebuf);
+-      return strcmp(name, namebuf);
+-}
+-
+ static unsigned int get_symbol_seq(int index)
+ {
+       unsigned int i, seq = 0;
+@@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name,
+               seq = get_symbol_seq(mid);
+               off = get_symbol_offset(seq);
+               kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+-              ret = compare_symbol_name(name, namebuf);
++              ret = strcmp(name, namebuf);
+               if (ret > 0)
+                       low = mid + 1;
+               else if (ret < 0)
+@@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name,
+               seq = get_symbol_seq(low - 1);
+               off = get_symbol_offset(seq);
+               kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+-              if (compare_symbol_name(name, namebuf))
++              if (strcmp(name, namebuf))
+                       break;
+               low--;
+       }
+@@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name,
+                       seq = get_symbol_seq(high + 1);
+                       off = get_symbol_offset(seq);
+                       kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+-                      if (compare_symbol_name(name, namebuf))
++                      if (strcmp(name, namebuf))
+                               break;
+                       high++;
+               }
+@@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr,
+               if (modbuildid)
+                       *modbuildid = NULL;
+ 
+-              ret = strlen(namebuf);
+-              goto found;
++              return strlen(namebuf);
+       }
+ 
+       /* See if it's in a module or a BPF JITed image. */
+@@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr,
+               ret = ftrace_mod_address_lookup(addr, symbolsize,
+                                               offset, modname, namebuf);
+ 
+-found:
+-      cleanup_symbol_name(namebuf);
+       return ret;
+ }
+ 
+@@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr,
+ 
+ int lookup_symbol_name(unsigned long addr, char *symname)
+ {
+-      int res;
+-
+       symname[0] = '\0';
+       symname[KSYM_NAME_LEN - 1] = '\0';
+ 
+@@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname)
+               /* Grab name */
+               kallsyms_expand_symbol(get_symbol_offset(pos),
+                                      symname, KSYM_NAME_LEN);
+-              goto found;
++              return 0;
+       }
+       /* See if it's in a module. */
+-      res = lookup_module_symbol_name(addr, symname);
+-      if (res)
+-              return res;
+-
+-found:
+-      cleanup_symbol_name(symname);
+-      return 0;
++      return lookup_module_symbol_name(addr, symname);
+ }
+ 
+ /* Look up a kernel symbol and return it in a text buffer. */
+diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
+index 2f84896a7bcbd..873f7c445488c 100644
+--- a/kernel/kallsyms_selftest.c
++++ b/kernel/kallsyms_selftest.c
+@@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void)
+               stat.min, stat.max, div_u64(stat.sum, stat.real_cnt));
+ }
+ 
+-static bool match_cleanup_name(const char *s, const char *name)
+-{
+-      char *p;
+-      int len;
+-
+-      if (!IS_ENABLED(CONFIG_LTO_CLANG))
+-              return false;
+-
+-      p = strstr(s, ".llvm.");
+-      if (!p)
+-              return false;
+-
+-      len = strlen(name);
+-      if (p - s != len)
+-              return false;
+-
+-      return !strncmp(s, name, len);
+-}
+-
+ static int find_symbol(void *data, const char *name, unsigned long addr)
+ {
+       struct test_stat *stat = (struct test_stat *)data;
+ 
+-      if (strcmp(name, stat->name) == 0 ||
+-          (!stat->perf && match_cleanup_name(name, stat->name))) {
++      if (!strcmp(name, stat->name)) {
+               stat->real_cnt++;
+               stat->addr = addr;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/kbuild-refactor-variables-in-scripts-link-vmlinux.sh.patch b/queue-6.10/kbuild-refactor-variables-in-scripts-link-vmlinux.sh.patch

new file mode 100644 (file)

index 0000000..a125d37
--- /dev/null
+++ b/queue-6.10/kbuild-refactor-variables-in-scripts-link-vmlinux.sh.patch
@@ -0,0 +1,196 @@
+From 32e9b203f158fc276d49ce06d40cd9123d0169ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jun 2024 20:25:16 +0900
+Subject: kbuild: refactor variables in scripts/link-vmlinux.sh
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit ddf41329839f49dadf26973cd845ea160ac1784d ]
+
+Clean up the variables in scripts/link-vmlinux.sh
+
+ - Specify the extra objects directly in vmlinux_link()
+ - Move the AS rule to kallsyms()
+ - Set kallsymso and btf_vmlinux_bin_o where they are generated
+ - Remove unneeded variable, kallsymso_prev
+ - Introduce the btf_data variable
+ - Introduce the strip_debug flag instead of checking the output name
+
+No functional change intended.
+
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
+Stable-dep-of: 020925ce9299 ("kallsyms: Do not cleanup .llvm.<hash> suffix before sorting symbols")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/link-vmlinux.sh | 65 +++++++++++++++++++++--------------------
+ 1 file changed, 34 insertions(+), 31 deletions(-)
+
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 518c70b8db507..3d9d7257143a0 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -45,7 +45,6 @@ info()
+ 
+ # Link of vmlinux
+ # ${1} - output file
+-# ${2}, ${3}, ... - optional extra .o files
+ vmlinux_link()
+ {
+       local output=${1}
+@@ -90,7 +89,7 @@ vmlinux_link()
+       ldflags="${ldflags} ${wl}--script=${objtree}/${KBUILD_LDS}"
+ 
+       # The kallsyms linking does not need debug symbols included.
+-      if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then
++      if [ -n "${strip_debug}" ] ; then
+               ldflags="${ldflags} ${wl}--strip-debug"
+       fi
+ 
+@@ -101,7 +100,7 @@ vmlinux_link()
+       ${ld} ${ldflags} -o ${output}                                   \
+               ${wl}--whole-archive ${objs} ${wl}--no-whole-archive    \
+               ${wl}--start-group ${libs} ${wl}--end-group             \
+-              $@ ${ldlibs}
++              ${kallsymso} ${btf_vmlinux_bin_o} ${ldlibs}
+ }
+ 
+ # generate .BTF typeinfo from DWARF debuginfo
+@@ -110,6 +109,7 @@ vmlinux_link()
+ gen_btf()
+ {
+       local pahole_ver
++      local btf_data=${2}
+ 
+       if ! [ -x "$(command -v ${PAHOLE})" ]; then
+               echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
+@@ -124,16 +124,16 @@ gen_btf()
+ 
+       vmlinux_link ${1}
+ 
+-      info "BTF" ${2}
++      info BTF "${btf_data}"
+       LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1}
+ 
+-      # Create ${2} which contains just .BTF section but no symbols. Add
++      # Create ${btf_data} which contains just .BTF section but no symbols. Add
+       # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
+       # deletes all symbols including __start_BTF and __stop_BTF, which will
+       # be redefined in the linker script. Add 2>/dev/null to suppress GNU
+       # objcopy warnings: "empty loadable segment detected at ..."
+       ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
+-              --strip-all ${1} ${2} 2>/dev/null
++              --strip-all ${1} "${btf_data}" 2>/dev/null
+       # Change e_type to ET_REL so that it can be used to link final vmlinux.
+       # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+       if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+@@ -141,10 +141,12 @@ gen_btf()
+       else
+               et_rel='\1\0'
+       fi
+-      printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none
++      printf "${et_rel}" | dd of="${btf_data}" conv=notrunc bs=1 seek=16 status=none
++
++      btf_vmlinux_bin_o=${btf_data}
+ }
+ 
+-# Create ${2} .S file with all symbols from the ${1} object file
++# Create ${2}.o file with all symbols from the ${1} object file
+ kallsyms()
+ {
+       local kallsymopt;
+@@ -165,27 +167,25 @@ kallsyms()
+               kallsymopt="${kallsymopt} --lto-clang"
+       fi
+ 
+-      info KSYMS ${2}
+-      scripts/kallsyms ${kallsymopt} ${1} > ${2}
++      info KSYMS "${2}.S"
++      scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S"
++
++      info AS "${2}.o"
++      ${CC} ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS} \
++            ${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} -c -o "${2}.o" "${2}.S"
++
++      kallsymso=${2}.o
+ }
+ 
+ # Perform one step in kallsyms generation, including temporary linking of
+ # vmlinux.
+ kallsyms_step()
+ {
+-      kallsymso_prev=${kallsymso}
+       kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1}
+-      kallsymso=${kallsyms_vmlinux}.o
+-      kallsyms_S=${kallsyms_vmlinux}.S
+-
+-      vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o}
+-      mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms
+-      kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S}
+ 
+-      info AS ${kallsymso}
+-      ${CC} ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS} \
+-            ${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \
+-            -c -o ${kallsymso} ${kallsyms_S}
++      vmlinux_link "${kallsyms_vmlinux}"
++      mksysmap "${kallsyms_vmlinux}" "${kallsyms_vmlinux}.syms"
++      kallsyms "${kallsyms_vmlinux}.syms" "${kallsyms_vmlinux}"
+ }
+ 
+ # Create map file with all symbols from ${1}
+@@ -223,19 +223,18 @@ fi
+ 
+ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init init/version-timestamp.o
+ 
+-btf_vmlinux_bin_o=""
++btf_vmlinux_bin_o=
++kallsymso=
++strip_debug=
++
+ if is_enabled CONFIG_DEBUG_INFO_BTF; then
+-      btf_vmlinux_bin_o=.btf.vmlinux.bin.o
+-      if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then
++      if ! gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then
+               echo >&2 "Failed to generate BTF for vmlinux"
+               echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF"
+               exit 1
+       fi
+ fi
+ 
+-kallsymso=""
+-kallsymso_prev=""
+-kallsyms_vmlinux=""
+ if is_enabled CONFIG_KALLSYMS; then
+ 
+       # kallsyms support
+@@ -261,11 +260,13 @@ if is_enabled CONFIG_KALLSYMS; then
+       # a)  Verify that the System.map from vmlinux matches the map from
+       #     ${kallsymso}.
+ 
++      # The kallsyms linking does not need debug symbols included.
++      strip_debug=1
++
+       kallsyms_step 1
+-      kallsyms_step 2
++      size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso})
+ 
+-      # step 3
+-      size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso_prev})
++      kallsyms_step 2
+       size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso})
+ 
+       if [ $size1 -ne $size2 ] || [ -n "${KALLSYMS_EXTRA_PASS}" ]; then
+@@ -273,7 +274,9 @@ if is_enabled CONFIG_KALLSYMS; then
+       fi
+ fi
+ 
+-vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o}
++strip_debug=
++
++vmlinux_link vmlinux
+ 
+ # fill in BTF IDs
+ if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then
+-- 
+2.43.0
+
diff --git a/queue-6.10/kbuild-remove-provide-for-kallsyms-symbols.patch b/queue-6.10/kbuild-remove-provide-for-kallsyms-symbols.patch

new file mode 100644 (file)

index 0000000..c64a091
--- /dev/null
+++ b/queue-6.10/kbuild-remove-provide-for-kallsyms-symbols.patch
@@ -0,0 +1,144 @@
+From c0ce80a692c87f6c45846f6a109c8e6fbe16207c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jun 2024 20:25:17 +0900
+Subject: kbuild: remove PROVIDE() for kallsyms symbols
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit c442db3f49f27e5a60a641b2ac9a3c6320796ed6 ]
+
+This reimplements commit 951bcae6c5a0 ("kallsyms: Avoid weak references
+for kallsyms symbols") because I am not a big fan of PROVIDE().
+
+As an alternative solution, this commit prepends one more kallsyms step.
+
+    KSYMS   .tmp_vmlinux.kallsyms0.S          # added
+    AS      .tmp_vmlinux.kallsyms0.o          # added
+    LD      .tmp_vmlinux.btf
+    BTF     .btf.vmlinux.bin.o
+    LD      .tmp_vmlinux.kallsyms1
+    NM      .tmp_vmlinux.kallsyms1.syms
+    KSYMS   .tmp_vmlinux.kallsyms1.S
+    AS      .tmp_vmlinux.kallsyms1.o
+    LD      .tmp_vmlinux.kallsyms2
+    NM      .tmp_vmlinux.kallsyms2.syms
+    KSYMS   .tmp_vmlinux.kallsyms2.S
+    AS      .tmp_vmlinux.kallsyms2.o
+    LD      vmlinux
+
+Step 0 takes /dev/null as input, and generates .tmp_vmlinux.kallsyms0.o,
+which has a valid kallsyms format with the empty symbol list, and can be
+linked to vmlinux. Since it is really small, the added compile-time cost
+is negligible.
+
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
+Stable-dep-of: 020925ce9299 ("kallsyms: Do not cleanup .llvm.<hash> suffix before sorting symbols")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/vmlinux.lds.h | 19 -------------------
+ kernel/kallsyms_internal.h        |  5 -----
+ scripts/kallsyms.c                |  6 ------
+ scripts/link-vmlinux.sh           |  9 +++++++--
+ 4 files changed, 7 insertions(+), 32 deletions(-)
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 70bf1004076b2..f00a8e18f389f 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -451,30 +451,11 @@
+ #endif
+ #endif
+ 
+-/*
+- * Some symbol definitions will not exist yet during the first pass of the
+- * link, but are guaranteed to exist in the final link. Provide preliminary
+- * definitions that will be superseded in the final link to avoid having to
+- * rely on weak external linkage, which requires a GOT when used in position
+- * independent code.
+- */
+-#define PRELIMINARY_SYMBOL_DEFINITIONS                                        \
+-      PROVIDE(kallsyms_addresses = .);                                \
+-      PROVIDE(kallsyms_offsets = .);                                  \
+-      PROVIDE(kallsyms_names = .);                                    \
+-      PROVIDE(kallsyms_num_syms = .);                                 \
+-      PROVIDE(kallsyms_relative_base = .);                            \
+-      PROVIDE(kallsyms_token_table = .);                              \
+-      PROVIDE(kallsyms_token_index = .);                              \
+-      PROVIDE(kallsyms_markers = .);                                  \
+-      PROVIDE(kallsyms_seqs_of_names = .);
+-
+ /*
+  * Read only Data
+  */
+ #define RO_DATA(align)                                                        \
+       . = ALIGN((align));                                             \
+-      PRELIMINARY_SYMBOL_DEFINITIONS                                  \
+       .rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {           \
+               __start_rodata = .;                                     \
+               *(.rodata) *(.rodata.*)                                 \
+diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
+index 85480274fc8fb..925f2ab22639a 100644
+--- a/kernel/kallsyms_internal.h
++++ b/kernel/kallsyms_internal.h
+@@ -4,11 +4,6 @@
+ 
+ #include <linux/types.h>
+ 
+-/*
+- * These will be re-linked against their real values during the second link
+- * stage. Preliminary values must be provided in the linker script using the
+- * PROVIDE() directive so that the first link stage can complete successfully.
+- */
+ extern const unsigned long kallsyms_addresses[];
+ extern const int kallsyms_offsets[];
+ extern const u8 kallsyms_names[];
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 47978efe4797c..fa53b5eef5530 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -259,12 +259,6 @@ static void shrink_table(void)
+               }
+       }
+       table_cnt = pos;
+-
+-      /* When valid symbol is not registered, exit to error */
+-      if (!table_cnt) {
+-              fprintf(stderr, "No valid symbol.\n");
+-              exit(1);
+-      }
+ }
+ 
+ static void read_map(const char *in)
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 3d9d7257143a0..83d605ba7241a 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -227,6 +227,10 @@ btf_vmlinux_bin_o=
+ kallsymso=
+ strip_debug=
+ 
++if is_enabled CONFIG_KALLSYMS; then
++      kallsyms /dev/null .tmp_vmlinux.kallsyms0
++fi
++
+ if is_enabled CONFIG_DEBUG_INFO_BTF; then
+       if ! gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then
+               echo >&2 "Failed to generate BTF for vmlinux"
+@@ -239,9 +243,10 @@ if is_enabled CONFIG_KALLSYMS; then
+ 
+       # kallsyms support
+       # Generate section listing all symbols and add it into vmlinux
+-      # It's a three step process:
++      # It's a four step process:
++      # 0)  Generate a dummy __kallsyms with empty symbol list.
+       # 1)  Link .tmp_vmlinux.kallsyms1 so it has all symbols and sections,
+-      #     but __kallsyms is empty.
++      #     with a dummy __kallsyms.
+       #     Running kallsyms on that gives us .tmp_kallsyms1.o with
+       #     the right size
+       # 2)  Link .tmp_vmlinux.kallsyms2 so it now has a __kallsyms section of
+-- 
+2.43.0
+
diff --git a/queue-6.10/libfs-fix-infinite-directory-reads-for-offset-dir.patch b/queue-6.10/libfs-fix-infinite-directory-reads-for-offset-dir.patch

new file mode 100644 (file)

index 0000000..9b9e81d
--- /dev/null
+++ b/queue-6.10/libfs-fix-infinite-directory-reads-for-offset-dir.patch
@@ -0,0 +1,142 @@
+From f44ea14a6756ace7fb956da1afcfd997f83ae2ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jul 2024 12:38:35 +0800
+Subject: libfs: fix infinite directory reads for offset dir
+
+From: yangerkun <yangerkun@huawei.com>
+
+[ Upstream commit 64a7ce76fb901bf9f9c36cf5d681328fc0fd4b5a ]
+
+After we switch tmpfs dir operations from simple_dir_operations to
+simple_offset_dir_operations, every rename happened will fill new dentry
+to dest dir's maple tree(&SHMEM_I(inode)->dir_offsets->mt) with a free
+key starting with octx->newx_offset, and then set newx_offset equals to
+free key + 1. This will lead to infinite readdir combine with rename
+happened at the same time, which fail generic/736 in xfstests(detail show
+as below).
+
+1. create 5000 files(1 2 3...) under one dir
+2. call readdir(man 3 readdir) once, and get one entry
+3. rename(entry, "TEMPFILE"), then rename("TEMPFILE", entry)
+4. loop 2~3, until readdir return nothing or we loop too many
+   times(tmpfs break test with the second condition)
+
+We choose the same logic what commit 9b378f6ad48cf ("btrfs: fix infinite
+directory reads") to fix it, record the last_index when we open dir, and
+do not emit the entry which index >= last_index. The file->private_data
+now used in offset dir can use directly to do this, and we also update
+the last_index when we llseek the dir file.
+
+Fixes: a2e459555c5f ("shmem: stable directory offsets")
+Signed-off-by: yangerkun <yangerkun@huawei.com>
+Link: https://lore.kernel.org/r/20240731043835.1828697-1-yangerkun@huawei.com
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+[brauner: only update last_index after seek when offset is zero like Jan suggested]
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/libfs.c | 35 ++++++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+diff --git a/fs/libfs.c b/fs/libfs.c
+index b635ee5adbcce..65279e53fbf27 100644
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx)
+       mtree_destroy(&octx->mt);
+ }
+ 
++static int offset_dir_open(struct inode *inode, struct file *file)
++{
++      struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
++
++      file->private_data = (void *)ctx->next_offset;
++      return 0;
++}
++
+ /**
+  * offset_dir_llseek - Advance the read position of a directory descriptor
+  * @file: an open directory whose position is to be updated
+@@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx)
+  */
+ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+ {
++      struct inode *inode = file->f_inode;
++      struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
++
+       switch (whence) {
+       case SEEK_CUR:
+               offset += file->f_pos;
+@@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+       }
+ 
+       /* In this case, ->private_data is protected by f_pos_lock */
+-      file->private_data = NULL;
++      if (!offset)
++              file->private_data = (void *)ctx->next_offset;
+       return vfs_setpos(file, offset, LONG_MAX);
+ }
+ 
+@@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+                         inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+ }
+ 
+-static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
++static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
+ {
+       struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
+       struct dentry *dentry;
+@@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+       while (true) {
+               dentry = offset_find_next(octx, ctx->pos);
+               if (!dentry)
+-                      return ERR_PTR(-ENOENT);
++                      return;
++
++              if (dentry2offset(dentry) >= last_index) {
++                      dput(dentry);
++                      return;
++              }
+ 
+               if (!offset_dir_emit(ctx, dentry)) {
+                       dput(dentry);
+-                      break;
++                      return;
+               }
+ 
+               ctx->pos = dentry2offset(dentry) + 1;
+               dput(dentry);
+       }
+-      return NULL;
+ }
+ 
+ /**
+@@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+ static int offset_readdir(struct file *file, struct dir_context *ctx)
+ {
+       struct dentry *dir = file->f_path.dentry;
++      long last_index = (long)file->private_data;
+ 
+       lockdep_assert_held(&d_inode(dir)->i_rwsem);
+ 
+       if (!dir_emit_dots(file, ctx))
+               return 0;
+ 
+-      /* In this case, ->private_data is protected by f_pos_lock */
+-      if (ctx->pos == DIR_OFFSET_MIN)
+-              file->private_data = NULL;
+-      else if (file->private_data == ERR_PTR(-ENOENT))
+-              return 0;
+-      file->private_data = offset_iterate_dir(d_inode(dir), ctx);
++      offset_iterate_dir(d_inode(dir), ctx, last_index);
+       return 0;
+ }
+ 
+ const struct file_operations simple_offset_dir_operations = {
++      .open           = offset_dir_open,
+       .llseek         = offset_dir_llseek,
+       .iterate_shared = offset_readdir,
+       .read           = generic_read_dir,
+-- 
+2.43.0
+
diff --git a/queue-6.10/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch b/queue-6.10/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch

new file mode 100644 (file)

index 0000000..dd37bae
--- /dev/null
+++ b/queue-6.10/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch
@@ -0,0 +1,188 @@
+From eec23a2e5804f96448d65ae9c0e7ca8844e0d975 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 12:36:12 -0400
+Subject: mlxbf_gige: disable RX filters until RX path initialized
+
+From: David Thompson <davthompson@nvidia.com>
+
+[ Upstream commit df934abb185c71c9f2fa07a5013672d0cbd36560 ]
+
+A recent change to the driver exposed a bug where the MAC RX
+filters (unicast MAC, broadcast MAC, and multicast MAC) are
+configured and enabled before the RX path is fully initialized.
+The result of this bug is that after the PHY is started packets
+that match these MAC RX filters start to flow into the RX FIFO.
+And then, after rx_init() is completed, these packets will go
+into the driver RX ring as well. If enough packets are received
+to fill the RX ring (default size is 128 packets) before the call
+to request_irq() completes, the driver RX function becomes stuck.
+
+This bug is intermittent but is most likely to be seen where the
+oob_net0 interface is connected to a busy network with lots of
+broadcast and multicast traffic.
+
+All the MAC RX filters must be disabled until the RX path is ready,
+i.e. all initialization is done and all the IRQs are installed.
+
+Fixes: f7442a634ac0 ("mlxbf_gige: call request_irq() after NAPI initialized")
+Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Signed-off-by: David Thompson <davthompson@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20240809163612.12852-1-davthompson@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h |  8 +++
+ .../mellanox/mlxbf_gige/mlxbf_gige_main.c     | 10 ++++
+ .../mellanox/mlxbf_gige/mlxbf_gige_regs.h     |  2 +
+ .../mellanox/mlxbf_gige/mlxbf_gige_rx.c       | 50 ++++++++++++++++---
+ 4 files changed, 64 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+index bc94e75a7aebd..e7777700ee18a 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+@@ -40,6 +40,7 @@
+  */
+ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0
+ #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1
++#define MLXBF_GIGE_MAX_FILTER_IDX       3
+ 
+ /* Define for broadcast MAC literal */
+ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF
+@@ -175,6 +176,13 @@ enum mlxbf_gige_res {
+ int mlxbf_gige_mdio_probe(struct platform_device *pdev,
+                         struct mlxbf_gige *priv);
+ void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
++
++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv);
++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv);
++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
++                                   unsigned int index);
++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
++                                    unsigned int index);
+ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+                                 unsigned int index, u64 dmac);
+ void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+index b157f0f1c5a88..385a56ac73481 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+       if (err)
+               goto napi_deinit;
+ 
++      mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX);
++      mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX);
++      mlxbf_gige_enable_multicast_rx(priv);
++
+       /* Set bits in INT_EN that we care about */
+       int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
+                MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
+@@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
+       void __iomem *plu_base;
+       void __iomem *base;
+       int addr, phy_irq;
++      unsigned int i;
+       int err;
+ 
+       base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
+@@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
+       priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ;
+       priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ;
+ 
++      for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++)
++              mlxbf_gige_disable_mac_rx_filter(priv, i);
++      mlxbf_gige_disable_multicast_rx(priv);
++      mlxbf_gige_disable_promisc(priv);
++
+       /* Write initial MAC address to hardware */
+       mlxbf_gige_initial_mac(priv);
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+index 98a8681c21b9c..4d14cb13fd64e 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+@@ -62,6 +62,8 @@
+ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL           BIT(1)
+ #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START     0x0520
+ #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END       0x0528
++#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL              0x0530
++#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST         BIT(1)
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC           0x0540
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN        BIT(0)
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS           0x0548
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+index 6999843584934..eb62620b63c7f 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+@@ -11,15 +11,31 @@
+ #include "mlxbf_gige.h"
+ #include "mlxbf_gige_regs.h"
+ 
+-void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+-                                unsigned int index, u64 dmac)
++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv)
+ {
+       void __iomem *base = priv->base;
+-      u64 control;
++      u64 data;
+ 
+-      /* Write destination MAC to specified MAC RX filter */
+-      writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
+-             (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
++      data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++      data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
++      writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++}
++
++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv)
++{
++      void __iomem *base = priv->base;
++      u64 data;
++
++      data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++      data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
++      writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++}
++
++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
++                                   unsigned int index)
++{
++      void __iomem *base = priv->base;
++      u64 control;
+ 
+       /* Enable MAC receive filter mask for specified index */
+       control = readq(base + MLXBF_GIGE_CONTROL);
+@@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+       writeq(control, base + MLXBF_GIGE_CONTROL);
+ }
+ 
++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
++                                    unsigned int index)
++{
++      void __iomem *base = priv->base;
++      u64 control;
++
++      /* Disable MAC receive filter mask for specified index */
++      control = readq(base + MLXBF_GIGE_CONTROL);
++      control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
++      writeq(control, base + MLXBF_GIGE_CONTROL);
++}
++
++void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
++                                unsigned int index, u64 dmac)
++{
++      void __iomem *base = priv->base;
++
++      /* Write destination MAC to specified MAC RX filter */
++      writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
++             (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
++}
++
+ void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+                                 unsigned int index, u64 *dmac)
+ {
+-- 
+2.43.0
+
diff --git a/queue-6.10/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch b/queue-6.10/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch

new file mode 100644 (file)

index 0000000..a3091cd
--- /dev/null
+++ b/queue-6.10/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch
@@ -0,0 +1,42 @@
+From ac74b8b5e6adf2d8be74b0589eaba091a06f29e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Aug 2024 08:51:23 +0200
+Subject: mptcp: correct MPTCP_SUBFLOW_ATTR_SSN_OFFSET reserved size
+
+From: Eugene Syromiatnikov <esyr@redhat.com>
+
+[ Upstream commit 655111b838cdabdb604f3625a9ff08c5eedb11da ]
+
+ssn_offset field is u32 and is placed into the netlink response with
+nla_put_u32(), but only 2 bytes are reserved for the attribute payload
+in subflow_get_info_size() (even though it makes no difference
+in the end, as it is aligned up to 4 bytes).  Supply the correct
+argument to the relevant nla_total_size() call to make it less
+confusing.
+
+Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace")
+Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20240812065024.GA19719@asgard.redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/diag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
+index 3ae46b545d2c2..2d3efb405437d 100644
+--- a/net/mptcp/diag.c
++++ b/net/mptcp/diag.c
+@@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk)
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */
+               nla_total_size_64bit(8) +       /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */
+-              nla_total_size(2) +     /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
++              nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
+               nla_total_size(2) +     /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_FLAGS */
+               nla_total_size(1) +     /* MPTCP_SUBFLOW_ATTR_ID_REM */
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-axienet-fix-register-defines-comment-description.patch b/queue-6.10/net-axienet-fix-register-defines-comment-description.patch

new file mode 100644 (file)

index 0000000..98b64d9
--- /dev/null
+++ b/queue-6.10/net-axienet-fix-register-defines-comment-description.patch
@@ -0,0 +1,62 @@
+From 167ceebc2bbb391294019d9fe81b7252634acc22 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 11:56:09 +0530
+Subject: net: axienet: Fix register defines comment description
+
+From: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+
+[ Upstream commit 9ff2f816e2aa65ca9a1cdf0954842f8173c0f48d ]
+
+In axiethernet header fix register defines comment description to be
+inline with IP documentation. It updates MAC configuration register,
+MDIO configuration register and frame filter control description.
+
+Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
+Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/xilinx_axienet.h | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
+index fa5500decc960..c7d9221fafdcb 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
+@@ -160,16 +160,16 @@
+ #define XAE_RCW1_OFFSET               0x00000404 /* Rx Configuration Word 1 */
+ #define XAE_TC_OFFSET         0x00000408 /* Tx Configuration */
+ #define XAE_FCC_OFFSET                0x0000040C /* Flow Control Configuration */
+-#define XAE_EMMC_OFFSET               0x00000410 /* EMAC mode configuration */
+-#define XAE_PHYC_OFFSET               0x00000414 /* RGMII/SGMII configuration */
++#define XAE_EMMC_OFFSET               0x00000410 /* MAC speed configuration */
++#define XAE_PHYC_OFFSET               0x00000414 /* RX Max Frame Configuration */
+ #define XAE_ID_OFFSET         0x000004F8 /* Identification register */
+-#define XAE_MDIO_MC_OFFSET    0x00000500 /* MII Management Config */
+-#define XAE_MDIO_MCR_OFFSET   0x00000504 /* MII Management Control */
+-#define XAE_MDIO_MWD_OFFSET   0x00000508 /* MII Management Write Data */
+-#define XAE_MDIO_MRD_OFFSET   0x0000050C /* MII Management Read Data */
++#define XAE_MDIO_MC_OFFSET    0x00000500 /* MDIO Setup */
++#define XAE_MDIO_MCR_OFFSET   0x00000504 /* MDIO Control */
++#define XAE_MDIO_MWD_OFFSET   0x00000508 /* MDIO Write Data */
++#define XAE_MDIO_MRD_OFFSET   0x0000050C /* MDIO Read Data */
+ #define XAE_UAW0_OFFSET               0x00000700 /* Unicast address word 0 */
+ #define XAE_UAW1_OFFSET               0x00000704 /* Unicast address word 1 */
+-#define XAE_FMI_OFFSET                0x00000708 /* Filter Mask Index */
++#define XAE_FMI_OFFSET                0x00000708 /* Frame Filter Control */
+ #define XAE_AF0_OFFSET                0x00000710 /* Address Filter 0 */
+ #define XAE_AF1_OFFSET                0x00000714 /* Address Filter 1 */
+ 
+@@ -308,7 +308,7 @@
+  */
+ #define XAE_UAW1_UNICASTADDR_MASK     0x0000FFFF
+ 
+-/* Bit masks for Axi Ethernet FMI register */
++/* Bit masks for Axi Ethernet FMC register */
+ #define XAE_FMI_PM_MASK                       0x80000000 /* Promis. mode enable */
+ #define XAE_FMI_IND_MASK              0x00000003 /* Index Mask */
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch b/queue-6.10/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch

new file mode 100644 (file)

index 0000000..4c496e8
--- /dev/null
+++ b/queue-6.10/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch
@@ -0,0 +1,114 @@
+From 799d88331628b3ec322402dbad64edf1d89cf26a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 21:38:04 +0200
+Subject: net: dsa: vsc73xx: check busy flag in MDIO operations
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit fa63c6434b6f6aaf9d8d599dc899bc0a074cc0ad ]
+
+The VSC73xx has a busy flag used during MDIO operations. It is raised
+when MDIO read/write operations are in progress. Without it, PHYs are
+misconfigured and bus operations do not work as expected.
+
+Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 37 +++++++++++++++++++++++++-
+ 1 file changed, 36 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index 2725541b3c36f..56bb77dbd28a2 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -38,6 +38,10 @@
+ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */
+ #define VSC73XX_BLOCK_SYSTEM  0x7 /* Only subblock 0 */
+ 
++/* MII Block subblock */
++#define VSC73XX_BLOCK_MII_INTERNAL    0x0 /* Internal MDIO subblock */
++#define VSC73XX_BLOCK_MII_EXTERNAL    0x1 /* External MDIO subblock */
++
+ #define CPU_PORT      6 /* CPU port */
+ 
+ /* MAC Block registers */
+@@ -196,6 +200,8 @@
+ #define VSC73XX_MII_CMD               0x1
+ #define VSC73XX_MII_DATA      0x2
+ 
++#define VSC73XX_MII_STAT_BUSY BIT(3)
++
+ /* Arbiter block 5 registers */
+ #define VSC73XX_ARBEMPTY              0x0c
+ #define VSC73XX_ARBDISC                       0x0e
+@@ -270,6 +276,7 @@
+ #define IS_739X(a) (IS_7395(a) || IS_7398(a))
+ 
+ #define VSC73XX_POLL_SLEEP_US         1000
++#define VSC73XX_MDIO_POLL_SLEEP_US    5
+ #define VSC73XX_POLL_TIMEOUT_US               10000
+ 
+ struct vsc73xx_counter {
+@@ -487,6 +494,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc)
+       return 0;
+ }
+ 
++static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc)
++{
++      int ret, err;
++      u32 val;
++
++      ret = read_poll_timeout(vsc73xx_read, err,
++                              err < 0 || !(val & VSC73XX_MII_STAT_BUSY),
++                              VSC73XX_MDIO_POLL_SLEEP_US,
++                              VSC73XX_POLL_TIMEOUT_US, false, vsc,
++                              VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL,
++                              VSC73XX_MII_STAT, &val);
++      if (ret)
++              return ret;
++      return err;
++}
++
+ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum)
+ {
+       struct vsc73xx *vsc = ds->priv;
+@@ -494,12 +517,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum)
+       u32 val;
+       int ret;
+ 
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       /* Setting bit 26 means "read" */
+       cmd = BIT(26) | (phy << 21) | (regnum << 16);
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+-      msleep(2);
++
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val);
+       if (ret)
+               return ret;
+@@ -523,6 +554,10 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
+       u32 cmd;
+       int ret;
+ 
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       /* It was found through tedious experiments that this router
+        * chip really hates to have it's PHYs reset. They
+        * never recover if that happens: autonegotiation stops
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-dsa-vsc73xx-fix-port-mac-configuration-in-full-d.patch b/queue-6.10/net-dsa-vsc73xx-fix-port-mac-configuration-in-full-d.patch

new file mode 100644 (file)

index 0000000..7113efd
--- /dev/null
+++ b/queue-6.10/net-dsa-vsc73xx-fix-port-mac-configuration-in-full-d.patch
@@ -0,0 +1,52 @@
+From f8bd473a5a4f853f32c103b20a4a6e8c7a1e267c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 21:38:02 +0200
+Subject: net: dsa: vsc73xx: fix port MAC configuration in full duplex mode
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit 63796bc2e97cd5ebcef60bad4953259d4ad11cb4 ]
+
+According to the datasheet description ("Port Mode Procedure" in 5.6.2),
+the VSC73XX_MAC_CFG_WEXC_DIS bit is configured only for half duplex mode.
+
+The WEXC_DIS bit is responsible for MAC behavior after an excessive
+collision. Let's set it as described in the datasheet.
+
+Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index 4b031fefcec68..fc4030976bdce 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -817,6 +817,11 @@ static void vsc73xx_mac_link_up(struct phylink_config *config,
+ 
+       if (duplex == DUPLEX_FULL)
+               val |= VSC73XX_MAC_CFG_FDX;
++      else
++              /* In datasheet description ("Port Mode Procedure" in 5.6.2)
++               * this bit is configured only for half duplex.
++               */
++              val |= VSC73XX_MAC_CFG_WEXC_DIS;
+ 
+       /* This routine is described in the datasheet (below ARBDISC register
+        * description)
+@@ -827,7 +832,6 @@ static void vsc73xx_mac_link_up(struct phylink_config *config,
+       get_random_bytes(&seed, 1);
+       val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET;
+       val |= VSC73XX_MAC_CFG_SEED_LOAD;
+-      val |= VSC73XX_MAC_CFG_WEXC_DIS;
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG, val);
+ 
+       /* Flow control for the PHY facing ports:
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch b/queue-6.10/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch

new file mode 100644 (file)

index 0000000..ed2b40c
--- /dev/null
+++ b/queue-6.10/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch
@@ -0,0 +1,40 @@
+From ec9a3bdf7e761ab4864089fa8d6547db95486571 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 21:38:03 +0200
+Subject: net: dsa: vsc73xx: pass value in phy_write operation
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit 5b9eebc2c7a5f0cc7950d918c1e8a4ad4bed5010 ]
+
+In the 'vsc73xx_phy_write' function, the register value is missing,
+and the phy write operation always sends zeros.
+
+This commit passes the value variable into the proper register.
+
+Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index fc4030976bdce..2725541b3c36f 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -534,7 +534,7 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
+               return 0;
+       }
+ 
+-      cmd = (phy << 21) | (regnum << 16);
++      cmd = (phy << 21) | (regnum << 16) | val;
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch b/queue-6.10/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch

new file mode 100644 (file)

index 0000000..1de4073
--- /dev/null
+++ b/queue-6.10/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch
@@ -0,0 +1,70 @@
+From 6a49ad7f37a1098946b3aa4d691ce540730f3ef6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Aug 2024 13:26:51 +0800
+Subject: net: ethernet: mtk_wed: fix use-after-free panic in
+ mtk_wed_setup_tc_block_cb()
+
+From: Zheng Zhang <everything411@qq.com>
+
+[ Upstream commit db1b4bedb9b97c6d34b03d03815147c04fffe8b4 ]
+
+When there are multiple ap interfaces on one band and with WED on,
+turning the interface down will cause a kernel panic on MT798X.
+
+Previously, cb_priv was freed in mtk_wed_setup_tc_block() without
+marking NULL,and mtk_wed_setup_tc_block_cb() didn't check the value, too.
+
+Assign NULL after free cb_priv in mtk_wed_setup_tc_block() and check NULL
+in mtk_wed_setup_tc_block_cb().
+
+----------
+Unable to handle kernel paging request at virtual address 0072460bca32b4f5
+Call trace:
+ mtk_wed_setup_tc_block_cb+0x4/0x38
+ 0xffffffc0794084bc
+ tcf_block_playback_offloads+0x70/0x1e8
+ tcf_block_unbind+0x6c/0xc8
+...
+---------
+
+Fixes: 799684448e3e ("net: ethernet: mtk_wed: introduce wed wo support")
+Signed-off-by: Zheng Zhang <everything411@qq.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_wed.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
+index 61334a71058c7..e212a4ba92751 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed.c
++++ b/drivers/net/ethernet/mediatek/mtk_wed.c
+@@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri
+ {
+       struct mtk_wed_flow_block_priv *priv = cb_priv;
+       struct flow_cls_offload *cls = type_data;
+-      struct mtk_wed_hw *hw = priv->hw;
++      struct mtk_wed_hw *hw = NULL;
+ 
+-      if (!tc_can_offload(priv->dev))
++      if (!priv || !tc_can_offload(priv->dev))
+               return -EOPNOTSUPP;
+ 
+       if (type != TC_SETUP_CLSFLOWER)
+               return -EOPNOTSUPP;
+ 
++      hw = priv->hw;
+       return mtk_flow_offload_cmd(hw->eth, cls, hw->index);
+ }
+ 
+@@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev,
+                       flow_block_cb_remove(block_cb, f);
+                       list_del(&block_cb->driver_list);
+                       kfree(block_cb->cb_priv);
++                      block_cb->cb_priv = NULL;
+               }
+               return 0;
+       default:
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch b/queue-6.10/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch

new file mode 100644 (file)

index 0000000..74e3497
--- /dev/null
+++ b/queue-6.10/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch
@@ -0,0 +1,76 @@
+From 6c4fc5941352cd525220a47af9c1c1d90eba218d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:22 +0800
+Subject: net: hns3: fix a deadlock problem when config TC during resetting
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit be5e816d00a506719e9dbb1a9c861c5ced30a109 ]
+
+When config TC during the reset process, may cause a deadlock, the flow is
+as below:
+                             pf reset start
+                                 │
+                                 ▼
+                              ......
+setup tc                         │
+    │                            ▼
+    ▼                      DOWN: napi_disable()
+napi_disable()(skip)             │
+    │                            │
+    ▼                            ▼
+  ......                      ......
+    │                            │
+    ▼                            │
+napi_enable()                    │
+                                 ▼
+                           UINIT: netif_napi_del()
+                                 │
+                                 ▼
+                              ......
+                                 │
+                                 ▼
+                           INIT: netif_napi_add()
+                                 │
+                                 ▼
+                              ......                 global reset start
+                                 │                      │
+                                 ▼                      ▼
+                           UP: napi_enable()(skip)    ......
+                                 │                      │
+                                 ▼                      ▼
+                              ......                 napi_disable()
+
+In reset process, the driver will DOWN the port and then UINIT, in this
+case, the setup tc process will UP the port before UINIT, so cause the
+problem. Adds a DOWN process in UINIT to fix it.
+
+Fixes: bb6b94a896d4 ("net: hns3: Add reset interface implementation in client")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index a5fc0209d628e..4cbc4d069a1f3 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
+       struct net_device *netdev = handle->kinfo.netdev;
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
+ 
++      if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              hns3_nic_net_stop(netdev);
++
+       if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+               netdev_warn(netdev, "already uninitialized\n");
+               return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hns3-fix-wrong-use-of-semaphore-up.patch b/queue-6.10/net-hns3-fix-wrong-use-of-semaphore-up.patch

new file mode 100644 (file)

index 0000000..debf98a
--- /dev/null
+++ b/queue-6.10/net-hns3-fix-wrong-use-of-semaphore-up.patch
@@ -0,0 +1,61 @@
+From b7a01e9ffdb755e1e887014a988724a0f2d564e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:20 +0800
+Subject: net: hns3: fix wrong use of semaphore up
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 8445d9d3c03101859663d34fda747f6a50947556 ]
+
+Currently, if hns3 PF or VF FLR reset failed after five times retry,
+the reset done process will directly release the semaphore
+which has already released in hclge_reset_prepare_general.
+This will cause down operation fail.
+
+So this patch fixes it by adding reset state judgement. The up operation is
+only called after successful PF FLR reset.
+
+Fixes: 8627bdedc435 ("net: hns3: refactor the precedure of PF FLR")
+Fixes: f28368bb4542 ("net: hns3: refactor the procedure of VF FLR")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 4 ++--
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 82574ce0194fb..125e04434611d 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -11516,8 +11516,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
+               dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret);
+ 
+       hdev->reset_type = HNAE3_NONE_RESET;
+-      clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+-      up(&hdev->reset_sem);
++      if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
++              up(&hdev->reset_sem);
+ }
+ 
+ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index 3735d2fed11f7..094a7c7b55921 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev)
+                        ret);
+ 
+       hdev->reset_type = HNAE3_NONE_RESET;
+-      clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+-      up(&hdev->reset_sem);
++      if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
++              up(&hdev->reset_sem);
+ }
+ 
+ static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hns3-use-the-user-s-cfg-after-reset.patch b/queue-6.10/net-hns3-use-the-user-s-cfg-after-reset.patch

new file mode 100644 (file)

index 0000000..e88f3e7
--- /dev/null
+++ b/queue-6.10/net-hns3-use-the-user-s-cfg-after-reset.patch
@@ -0,0 +1,122 @@
+From 074f87de7346705cb535970457c0c2aa573fbb08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:21 +0800
+Subject: net: hns3: use the user's cfg after reset
+
+From: Peiyang Wang <wangpeiyang1@huawei.com>
+
+[ Upstream commit 30545e17eac1f50c5ef49644daf6af205100a965 ]
+
+Consider the followed case that the user change speed and reset the net
+interface. Before the hw change speed successfully, the driver get old
+old speed from hw by timer task. After reset, the previous speed is config
+to hw. As a result, the new speed is configed successfully but lost after
+PF reset. The followed pictured shows more dirrectly.
+
++------+              +----+                 +----+
+| USER |              | PF |                 | HW |
++---+--+              +-+--+                 +-+--+
+    |  ethtool -s 100G  |                      |
+    +------------------>|   set speed 100G     |
+    |                   +--------------------->|
+    |                   |  set successfully    |
+    |                   |<---------------------+---+
+    |                   |query cfg (timer task)|   |
+    |                   +--------------------->|   | handle speed
+    |                   |     return 200G      |   | changing event
+    |  ethtool --reset  |<---------------------+   | (100G)
+    +------------------>|  cfg previous speed  |<--+
+    |                   |  after reset (200G)  |
+    |                   +--------------------->|
+    |                   |                      +---+
+    |                   |query cfg (timer task)|   |
+    |                   +--------------------->|   | handle speed
+    |                   |     return 100G      |   | changing event
+    |                   |<---------------------+   | (200G)
+    |                   |                      |<--+
+    |                   |query cfg (timer task)|
+    |                   +--------------------->|
+    |                   |     return 200G      |
+    |                   |<---------------------+
+    |                   |                      |
+    v                   v                      v
+
+This patch save new speed if hw change speed successfully, which will be
+used after reset successfully.
+
+Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary")
+Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../hisilicon/hns3/hns3pf/hclge_main.c        | 24 ++++++++++++++-----
+ .../hisilicon/hns3/hns3pf/hclge_mdio.c        |  3 +++
+ 2 files changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 125e04434611d..465f0d5822837 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -2653,8 +2653,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
+ {
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
++      int ret;
++
++      ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
+ 
+-      return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
++      if (ret)
++              return ret;
++
++      hdev->hw.mac.req_speed = speed;
++      hdev->hw.mac.req_duplex = duplex;
++
++      return 0;
+ }
+ 
+ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
+@@ -2956,17 +2965,20 @@ static int hclge_mac_init(struct hclge_dev *hdev)
+       if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+               hdev->hw.mac.duplex = HCLGE_MAC_FULL;
+ 
+-      ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
+-                                       hdev->hw.mac.duplex, hdev->hw.mac.lane_num);
+-      if (ret)
+-              return ret;
+-
+       if (hdev->hw.mac.support_autoneg) {
+               ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
+               if (ret)
+                       return ret;
+       }
+ 
++      if (!hdev->hw.mac.autoneg) {
++              ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed,
++                                               hdev->hw.mac.req_duplex,
++                                               hdev->hw.mac.lane_num);
++              if (ret)
++                      return ret;
++      }
++
+       mac->link = 0;
+ 
+       if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+index 85fb11de43a12..80079657afebe 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+@@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev)
+       if (ret)
+               netdev_err(netdev, "failed to adjust link.\n");
+ 
++      hdev->hw.mac.req_speed = (u32)speed;
++      hdev->hw.mac.req_duplex = (u8)duplex;
++
+       ret = hclge_cfg_flowctrl(hdev);
+       if (ret)
+               netdev_err(netdev, "failed to configure flow control.\n");
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-sd-do-not-query-mpir-register-if-no-sd_grou.patch b/queue-6.10/net-mlx5-sd-do-not-query-mpir-register-if-no-sd_grou.patch

new file mode 100644 (file)

index 0000000..e1e3ea2
--- /dev/null
+++ b/queue-6.10/net-mlx5-sd-do-not-query-mpir-register-if-no-sd_grou.patch
@@ -0,0 +1,86 @@
+From 2b5c17408c3737046493e5d6f4ed5f546de19b0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 17:41:02 +0300
+Subject: net/mlx5: SD, Do not query MPIR register if no sd_group
+
+From: Tariq Toukan <tariqt@nvidia.com>
+
+[ Upstream commit c31fe2b5095d8c84562ce90db07600f7e9f318df ]
+
+Unconditionally calling the MPIR query on BF separate mode yields the FW
+syndrome below [1]. Do not call it unless admin clearly specified the SD
+group, i.e. expressing the intention of using the multi-PF netdev
+feature.
+
+This fix covers cases not covered in
+commit fca3b4791850 ("net/mlx5: Do not query MPIR on embedded CPU function").
+
+[1]
+mlx5_cmd_out_err:808:(pid 8267): ACCESS_REG(0x805) op_mod(0x1) failed,
+status bad system state(0x4), syndrome (0x685f19), err(-5)
+
+Fixes: 678eb448055a ("net/mlx5: SD, Implement basic query and instantiation")
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Link: https://patch.msgid.link/20240808144107.2095424-2-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/lib/sd.c   | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+index f6deb5a3f8202..eeb0b7ea05f12 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+@@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
+ }
+ 
+ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
+-                       u8 *host_buses, u8 *sd_group)
++                       u8 *host_buses)
+ {
+       u32 out[MLX5_ST_SZ_DW(mpir_reg)];
+       int err;
+@@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
+       if (err)
+               return err;
+ 
+-      err = mlx5_query_nic_vport_sd_group(dev, sd_group);
+-      if (err)
+-              return err;
+-
+       *sdm = MLX5_GET(mpir_reg, out, sdm);
+       *host_buses = MLX5_GET(mpir_reg, out, host_buses);
+ 
+@@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev)
+       if (mlx5_core_is_ecpf(dev))
+               return 0;
+ 
++      err = mlx5_query_nic_vport_sd_group(dev, &sd_group);
++      if (err)
++              return err;
++
++      if (!sd_group)
++              return 0;
++
+       if (!MLX5_CAP_MCAM_REG(dev, mpir))
+               return 0;
+ 
+-      err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group);
++      err = mlx5_query_sd(dev, &sdm, &host_buses);
+       if (err)
+               return err;
+ 
+       if (!sdm)
+               return 0;
+ 
+-      if (!sd_group)
+-              return 0;
+-
+       group_id = mlx5_sd_group_id(dev, sd_group);
+ 
+       if (!mlx5_sd_is_supported(dev, host_buses)) {
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch b/queue-6.10/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch

new file mode 100644 (file)

index 0000000..de21f7d
--- /dev/null
+++ b/queue-6.10/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch
@@ -0,0 +1,46 @@
+From e48b38c18f1789697b6bb097bc62eda4bbd10f4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 17:41:05 +0300
+Subject: net/mlx5e: Correctly report errors for ethtool rx flows
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit cbc796be1779c4dbc9a482c7233995e2a8b6bfb3 ]
+
+Previously, an ethtool rx flow with no attrs would not be added to the
+NIC as it has no rules to configure the hw with, but it would be
+reported as successful to the caller (return code 0). This is confusing
+for the user as ethtool then reports "Added rule $num", but no rule was
+actually added.
+
+This change corrects that by instead reporting these wrong rules as
+-EINVAL.
+
+Fixes: b29c61dac3a2 ("net/mlx5e: Ethtool steering flow validation refactoring")
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20240808144107.2095424-5-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+index 3eccdadc03578..773624bb2c5d5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+       if (num_tuples <= 0) {
+               netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
+                           __func__, num_tuples);
+-              return num_tuples;
++              return num_tuples < 0 ? num_tuples : -EINVAL;
+       }
+ 
+       eth_ft = get_flow_table(priv, fs, num_tuples);
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch b/queue-6.10/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch

new file mode 100644 (file)

index 0000000..c0889c2
--- /dev/null
+++ b/queue-6.10/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch
@@ -0,0 +1,45 @@
+From 15aa171207a5f42c11d7d1ede82bbe69a7929fa1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 17:41:04 +0300
+Subject: net/mlx5e: Take state lock during tx timeout reporter
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit e6b5afd30b99b43682a7764e1a74a42fe4d5f4b3 ]
+
+mlx5e_safe_reopen_channels() requires the state lock taken. The
+referenced changed in the Fixes tag removed the lock to fix another
+issue. This patch adds it back but at a later point (when calling
+mlx5e_safe_reopen_channels()) to avoid the deadlock referenced in the
+Fixes tag.
+
+Fixes: eab0da38912e ("net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Link: https://lore.kernel.org/all/ZplpKq8FKi3vwfxv@gmail.com/T/
+Reviewed-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20240808144107.2095424-4-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+index 22918b2ef7f12..09433b91be176 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+@@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
+               return err;
+       }
+ 
++      mutex_lock(&priv->state_lock);
+       err = mlx5e_safe_reopen_channels(priv);
++      mutex_unlock(&priv->state_lock);
+       if (!err) {
+               to_ctx->status = 1; /* all channels recovered */
+               return err;
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch b/queue-6.10/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch

new file mode 100644 (file)

index 0000000..fe96907
--- /dev/null
+++ b/queue-6.10/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch
@@ -0,0 +1,46 @@
+From b351ec2203c8b888ec8128ee4556ba3f2c84d7b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2024 12:40:52 +0100
+Subject: netfilter: allow ipv6 fragments to arrive on different devices
+
+From: Tom Hughes <tom@compton.nu>
+
+[ Upstream commit 3cd740b985963f874a1a094f1969e998b9d05554 ]
+
+Commit 264640fc2c5f4 ("ipv6: distinguish frag queues by device
+for multicast and link-local packets") modified the ipv6 fragment
+reassembly logic to distinguish frag queues by device for multicast
+and link-local packets but in fact only the main reassembly code
+limits the use of the device to those address types and the netfilter
+reassembly code uses the device for all packets.
+
+This means that if fragments of a packet arrive on different interfaces
+then netfilter will fail to reassemble them and the fragments will be
+expired without going any further through the filters.
+
+Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
+Signed-off-by: Tom Hughes <tom@compton.nu>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 5e1b50c6a44d2..3e9779ed7daec 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+       };
+       struct inet_frag_queue *q;
+ 
++      if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
++                                          IPV6_ADDR_LINKLOCAL)))
++              key.iif = 0;
++
+       q = inet_frag_find(nf_frag->fqdir, &key);
+       if (!q)
+               return NULL;
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-flowtable-initialise-extack-before-use.patch b/queue-6.10/netfilter-flowtable-initialise-extack-before-use.patch

new file mode 100644 (file)

index 0000000..b0b7465
--- /dev/null
+++ b/queue-6.10/netfilter-flowtable-initialise-extack-before-use.patch
@@ -0,0 +1,37 @@
+From 484e95440bd15e836f38eea9d289c106113af2df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2024 17:16:37 +0100
+Subject: netfilter: flowtable: initialise extack before use
+
+From: Donald Hunter <donald.hunter@gmail.com>
+
+[ Upstream commit e9767137308daf906496613fd879808a07f006a2 ]
+
+Fix missing initialisation of extack in flow offload.
+
+Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support")
+Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_flow_table_offload.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index a010b25076ca0..3d46372b538e5 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
+                                struct list_head *block_cb_list)
+ {
+       struct flow_cls_offload cls_flow = {};
++      struct netlink_ext_ack extack = {};
+       struct flow_block_cb *block_cb;
+-      struct netlink_ext_ack extack;
+       __be16 proto = ETH_P_ALL;
+       int err, i = 0;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch b/queue-6.10/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch

new file mode 100644 (file)

index 0000000..3deb171
--- /dev/null
+++ b/queue-6.10/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch
@@ -0,0 +1,109 @@
+From e02f8cf97ff2a3b2031984fbc3096f6a626e600e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Aug 2024 21:28:41 +0200
+Subject: netfilter: nf_queue: drop packets with cloned unconfirmed conntracks
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 7d8dc1c7be8d3509e8f5164dd5df64c8e34d7eeb ]
+
+Conntrack assumes an unconfirmed entry (not yet committed to global hash
+table) has a refcount of 1 and is not visible to other cores.
+
+With multicast forwarding this assumption breaks down because such
+skbs get cloned after being picked up, i.e.  ct->use refcount is > 1.
+
+Likewise, bridge netfilter will clone broad/mutlicast frames and
+all frames in case they need to be flood-forwarded during learning
+phase.
+
+For ip multicast forwarding or plain bridge flood-forward this will
+"work" because packets don't leave softirq and are implicitly
+serialized.
+
+With nfqueue this no longer holds true, the packets get queued
+and can be reinjected in arbitrary ways.
+
+Disable this feature, I see no other solution.
+
+After this patch, nfqueue cannot queue packets except the last
+multicast/broadcast packet.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_netfilter_hooks.c |  6 +++++-
+ net/netfilter/nfnetlink_queue.c | 35 +++++++++++++++++++++++++++++++--
+ 2 files changed, 38 insertions(+), 3 deletions(-)
+
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index bf30c50b56895..a9e1b56f854d4 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -619,8 +619,12 @@ static unsigned int br_nf_local_in(void *priv,
+       if (likely(nf_ct_is_confirmed(ct)))
+               return NF_ACCEPT;
+ 
++      if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
++              nf_reset_ct(skb);
++              return NF_ACCEPT;
++      }
++
+       WARN_ON_ONCE(skb_shared(skb));
+-      WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+ 
+       /* We can't call nf_confirm here, it would create a dependency
+        * on nf_conntrack module.
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 55e28e1da66ec..e0716da256bf5 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+ {
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+-      const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
++      struct nf_conn *ct = (void *)skb_nfct(entry->skb);
++      unsigned long status;
++      unsigned int use;
+ 
+-      if (ct && ((ct->status & flags) == IPS_DYING))
++      if (!ct)
++              return false;
++
++      status = READ_ONCE(ct->status);
++      if ((status & flags) == IPS_DYING)
+               return true;
++
++      if (status & IPS_CONFIRMED)
++              return false;
++
++      /* in some cases skb_clone() can occur after initial conntrack
++       * pickup, but conntrack assumes exclusive skb->_nfct ownership for
++       * unconfirmed entries.
++       *
++       * This happens for br_netfilter and with ip multicast routing.
++       * We can't be solved with serialization here because one clone could
++       * have been queued for local delivery.
++       */
++      use = refcount_read(&ct->ct_general.use);
++      if (likely(use == 1))
++              return false;
++
++      /* Can't decrement further? Exclusive ownership. */
++      if (!refcount_dec_not_one(&ct->ct_general.use))
++              return false;
++
++      skb_set_nfct(entry->skb, 0);
++      /* No nf_ct_put(): we already decremented .use and it cannot
++       * drop down to 0.
++       */
++      return true;
+ #endif
+       return false;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch b/queue-6.10/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch

new file mode 100644 (file)

index 0000000..c44c172
--- /dev/null
+++ b/queue-6.10/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch
@@ -0,0 +1,155 @@
+From 4d58707b0e869f87fa22432e6e382343ae4742ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:32 +0200
+Subject: netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit bd662c4218f9648e888bebde9468146965f3f8a0 ]
+
+Objects' dump callbacks are not concurrency-safe per-se with reset bit
+set. If two CPUs perform a reset at the same time, at least counter and
+quota objects suffer from value underrun.
+
+Prevent this by introducing dedicated locking callbacks for nfnetlink
+and the asynchronous dump handling to serialize access.
+
+Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
+ 1 file changed, 59 insertions(+), 13 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 2f32db2a09fec..41d7faeb101cf 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7977,6 +7977,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+       return skb->len;
+ }
+ 
++static int nf_tables_dumpreset_obj(struct sk_buff *skb,
++                                 struct netlink_callback *cb)
++{
++      struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
++      int ret;
++
++      mutex_lock(&nft_net->commit_mutex);
++      ret = nf_tables_dump_obj(skb, cb);
++      mutex_unlock(&nft_net->commit_mutex);
++
++      return ret;
++}
++
+ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+ {
+       struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+@@ -7993,12 +8006,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+       if (nla[NFTA_OBJ_TYPE])
+               ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+ 
+-      if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+-              ctx->reset = true;
+-
+       return 0;
+ }
+ 
++static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
++{
++      struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
++
++      ctx->reset = true;
++
++      return nf_tables_dump_obj_start(cb);
++}
++
+ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ {
+       struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+@@ -8057,18 +8076,43 @@ nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
+ 
+ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
++{
++      u32 portid = NETLINK_CB(skb).portid;
++      struct sk_buff *skb2;
++
++      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
++              struct netlink_dump_control c = {
++                      .start = nf_tables_dump_obj_start,
++                      .dump = nf_tables_dump_obj,
++                      .done = nf_tables_dump_obj_done,
++                      .module = THIS_MODULE,
++                      .data = (void *)nla,
++              };
++
++              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
++      }
++
++      skb2 = nf_tables_getobj_single(portid, info, nla, false);
++      if (IS_ERR(skb2))
++              return PTR_ERR(skb2);
++
++      return nfnetlink_unicast(skb2, info->net, portid);
++}
++
++static int nf_tables_getobj_reset(struct sk_buff *skb,
++                                const struct nfnl_info *info,
++                                const struct nlattr * const nla[])
+ {
+       struct nftables_pernet *nft_net = nft_pernet(info->net);
+       u32 portid = NETLINK_CB(skb).portid;
+       struct net *net = info->net;
+       struct sk_buff *skb2;
+-      bool reset = false;
+       char *buf;
+ 
+       if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+-                      .start = nf_tables_dump_obj_start,
+-                      .dump = nf_tables_dump_obj,
++                      .start = nf_tables_dumpreset_obj_start,
++                      .dump = nf_tables_dumpreset_obj,
+                       .done = nf_tables_dump_obj_done,
+                       .module = THIS_MODULE,
+                       .data = (void *)nla,
+@@ -8077,16 +8121,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+               return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+       }
+ 
+-      if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+-              reset = true;
++      if (!try_module_get(THIS_MODULE))
++              return -EINVAL;
++      rcu_read_unlock();
++      mutex_lock(&nft_net->commit_mutex);
++      skb2 = nf_tables_getobj_single(portid, info, nla, true);
++      mutex_unlock(&nft_net->commit_mutex);
++      rcu_read_lock();
++      module_put(THIS_MODULE);
+ 
+-      skb2 = nf_tables_getobj_single(portid, info, nla, reset);
+       if (IS_ERR(skb2))
+               return PTR_ERR(skb2);
+ 
+-      if (!reset)
+-              return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+-
+       buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+                       nla_len(nla[NFTA_OBJ_TABLE]),
+                       (char *)nla_data(nla[NFTA_OBJ_TABLE]),
+@@ -9378,7 +9424,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+               .policy         = nft_obj_policy,
+       },
+       [NFT_MSG_GETOBJ_RESET] = {
+-              .call           = nf_tables_getobj,
++              .call           = nf_tables_getobj_reset,
+               .type           = NFNL_CB_RCU,
+               .attr_count     = NFTA_OBJ_MAX,
+               .policy         = nft_obj_policy,
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch b/queue-6.10/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch

new file mode 100644 (file)

index 0000000..eb4323e
--- /dev/null
+++ b/queue-6.10/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch
@@ -0,0 +1,91 @@
+From 388e65f82de18a03f71553695a638644214f45e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:30 +0200
+Subject: netfilter: nf_tables: Audit log dump reset after the fact
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit e0b6648b0446e59522819c75ba1dcb09e68d3e94 ]
+
+In theory, dumpreset may fail and invalidate the preceeding log message.
+Fix this and use the occasion to prepare for object reset locking, which
+benefits from a few unrelated changes:
+
+* Add an early call to nfnetlink_unicast if not resetting which
+  effectively skips the audit logging but also unindents it.
+* Extract the table's name from the netlink attribute (which is verified
+  via earlier table lookup) to not rely upon validity of the looked up
+  table pointer.
+* Do not use local variable family, it will vanish.
+
+Fixes: 8e6cf365e1d5 ("audit: log nftables configuration change events")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 28 +++++++++++++---------------
+ 1 file changed, 13 insertions(+), 15 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 91cc3a81ba8f1..7ae055521cf36 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -8012,6 +8012,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
+ {
++      const struct nftables_pernet *nft_net = nft_pernet(info->net);
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_cur(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+@@ -8021,6 +8022,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       struct sk_buff *skb2;
+       bool reset = false;
+       u32 objtype;
++      char *buf;
+       int err;
+ 
+       if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+@@ -8059,27 +8061,23 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+               reset = true;
+ 
+-      if (reset) {
+-              const struct nftables_pernet *nft_net;
+-              char *buf;
+-
+-              nft_net = nft_pernet(net);
+-              buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+-
+-              audit_log_nfcfg(buf,
+-                              family,
+-                              1,
+-                              AUDIT_NFT_OP_OBJ_RESET,
+-                              GFP_ATOMIC);
+-              kfree(buf);
+-      }
+-
+       err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+                                     info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+                                     family, table, obj, reset);
+       if (err < 0)
+               goto err_fill_obj_info;
+ 
++      if (!reset)
++              return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
++
++      buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
++                      nla_len(nla[NFTA_OBJ_TABLE]),
++                      (char *)nla_data(nla[NFTA_OBJ_TABLE]),
++                      nft_net->base_seq);
++      audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
++                      AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
++      kfree(buf);
++
+       return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ 
+ err_fill_obj_info:
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch b/queue-6.10/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch

new file mode 100644 (file)

index 0000000..ec0bfac
--- /dev/null
+++ b/queue-6.10/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch
@@ -0,0 +1,148 @@
+From 42cd864299d43e394d4e7af24d8879c0f09f8bc2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:31 +0200
+Subject: netfilter: nf_tables: Introduce nf_tables_getobj_single
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 69fc3e9e90f1afc11f4015e6b75d18ab9acee348 ]
+
+Outsource the reply skb preparation for non-dump getrule requests into a
+distinct function. Prep work for object reset locking.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 75 ++++++++++++++++++++---------------
+ 1 file changed, 44 insertions(+), 31 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 7ae055521cf36..2f32db2a09fec 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -8009,10 +8009,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ }
+ 
+ /* called with rcu_read_lock held */
+-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+-                          const struct nlattr * const nla[])
++static struct sk_buff *
++nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
++                      const struct nlattr * const nla[], bool reset)
+ {
+-      const struct nftables_pernet *nft_net = nft_pernet(info->net);
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_cur(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+@@ -8020,52 +8020,69 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       struct net *net = info->net;
+       struct nft_object *obj;
+       struct sk_buff *skb2;
+-      bool reset = false;
+       u32 objtype;
+-      char *buf;
+       int err;
+ 
+-      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+-              struct netlink_dump_control c = {
+-                      .start = nf_tables_dump_obj_start,
+-                      .dump = nf_tables_dump_obj,
+-                      .done = nf_tables_dump_obj_done,
+-                      .module = THIS_MODULE,
+-                      .data = (void *)nla,
+-              };
+-
+-              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+-      }
+-
+       if (!nla[NFTA_OBJ_NAME] ||
+           !nla[NFTA_OBJ_TYPE])
+-              return -EINVAL;
++              return ERR_PTR(-EINVAL);
+ 
+       table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+-              return PTR_ERR(table);
++              return ERR_CAST(table);
+       }
+ 
+       objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+       obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
+       if (IS_ERR(obj)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+-              return PTR_ERR(obj);
++              return ERR_CAST(obj);
+       }
+ 
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+       if (!skb2)
+-              return -ENOMEM;
++              return ERR_PTR(-ENOMEM);
++
++      err = nf_tables_fill_obj_info(skb2, net, portid,
++                                    info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
++                                    family, table, obj, reset);
++      if (err < 0) {
++              kfree_skb(skb2);
++              return ERR_PTR(err);
++      }
++
++      return skb2;
++}
++
++static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
++                          const struct nlattr * const nla[])
++{
++      struct nftables_pernet *nft_net = nft_pernet(info->net);
++      u32 portid = NETLINK_CB(skb).portid;
++      struct net *net = info->net;
++      struct sk_buff *skb2;
++      bool reset = false;
++      char *buf;
++
++      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
++              struct netlink_dump_control c = {
++                      .start = nf_tables_dump_obj_start,
++                      .dump = nf_tables_dump_obj,
++                      .done = nf_tables_dump_obj_done,
++                      .module = THIS_MODULE,
++                      .data = (void *)nla,
++              };
++
++              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
++      }
+ 
+       if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+               reset = true;
+ 
+-      err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+-                                    info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+-                                    family, table, obj, reset);
+-      if (err < 0)
+-              goto err_fill_obj_info;
++      skb2 = nf_tables_getobj_single(portid, info, nla, reset);
++      if (IS_ERR(skb2))
++              return PTR_ERR(skb2);
+ 
+       if (!reset)
+               return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+@@ -8078,11 +8095,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                       AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+       kfree(buf);
+ 
+-      return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+-
+-err_fill_obj_info:
+-      kfree_skb(skb2);
+-      return err;
++      return nfnetlink_unicast(skb2, net, portid);
+ }
+ 
+ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nfnetlink-initialise-extack-before-use-in-.patch b/queue-6.10/netfilter-nfnetlink-initialise-extack-before-use-in-.patch

new file mode 100644 (file)

index 0000000..2f5f8e5
--- /dev/null
+++ b/queue-6.10/netfilter-nfnetlink-initialise-extack-before-use-in-.patch
@@ -0,0 +1,47 @@
+From ae31b4b914ae2c2eb365563b94ee5e30e4179154 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2024 16:43:24 +0100
+Subject: netfilter: nfnetlink: Initialise extack before use in ACKs
+
+From: Donald Hunter <donald.hunter@gmail.com>
+
+[ Upstream commit d1a7b382a9d3f0f3e5a80e0be2991c075fa4f618 ]
+
+Add missing extack initialisation when ACKing BATCH_BEGIN and BATCH_END.
+
+Fixes: bf2ac490d28c ("netfilter: nfnetlink: Handle ACK flags for batch messages")
+Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
+index 4abf660c7baff..932b3ddb34f13 100644
+--- a/net/netfilter/nfnetlink.c
++++ b/net/netfilter/nfnetlink.c
+@@ -427,8 +427,10 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+ 
+       nfnl_unlock(subsys_id);
+ 
+-      if (nlh->nlmsg_flags & NLM_F_ACK)
++      if (nlh->nlmsg_flags & NLM_F_ACK) {
++              memset(&extack, 0, sizeof(extack));
+               nfnl_err_add(&err_list, nlh, 0, &extack);
++      }
+ 
+       while (skb->len >= nlmsg_total_size(0)) {
+               int msglen, type;
+@@ -577,6 +579,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       ss->abort(net, oskb, NFNL_ABORT_NONE);
+                       netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+               } else if (nlh->nlmsg_flags & NLM_F_ACK) {
++                      memset(&extack, 0, sizeof(extack));
+                       nfnl_err_add(&err_list, nlh, 0, &extack);
+               }
+       } else {
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfs-fault-in-smaller-chunks-for-non-large-folio-ma.patch b/queue-6.10/netfs-fault-in-smaller-chunks-for-non-large-folio-ma.patch

new file mode 100644 (file)

index 0000000..932ee4d
--- /dev/null
+++ b/queue-6.10/netfs-fault-in-smaller-chunks-for-non-large-folio-ma.patch
@@ -0,0 +1,39 @@
+From 869cd1bcb096701f0ecc97176c6556b50e2dd17f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 21:17:32 +0100
+Subject: netfs: Fault in smaller chunks for non-large folio mappings
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+[ Upstream commit 98055bc3595500bcf2126b93b1595354bdb86a66 ]
+
+As in commit 4e527d5841e2 ("iomap: fault in smaller chunks for non-large
+folio mappings"), we can see a performance loss for filesystems
+which have not yet been converted to large folios.
+
+Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Link: https://lore.kernel.org/r/20240527201735.1898381-1-willy@infradead.org
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/netfs/buffered_write.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
+index ecbc99ec7d367..18055c1e01835 100644
+--- a/fs/netfs/buffered_write.c
++++ b/fs/netfs/buffered_write.c
+@@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+       unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
+       ssize_t written = 0, ret, ret2;
+       loff_t i_size, pos = iocb->ki_pos, from, to;
+-      size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
++      size_t max_chunk = mapping_max_folio_size(mapping);
+       bool maybe_trouble = false;
+ 
+       if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
+-- 
+2.43.0
+
diff --git a/queue-6.10/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch b/queue-6.10/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch

new file mode 100644 (file)

index 0000000..946c54c
--- /dev/null
+++ b/queue-6.10/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch
@@ -0,0 +1,60 @@
+From a04a03afc428479252a1b63be92672d8979339f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Aug 2024 13:25:48 +0200
+Subject: s390/uv: Panic for set and remove shared access UVC errors
+
+From: Claudio Imbrenda <imbrenda@linux.ibm.com>
+
+[ Upstream commit cff59d8631e1409ffdd22d9d717e15810181b32c ]
+
+The return value uv_set_shared() and uv_remove_shared() (which are
+wrappers around the share() function) is not always checked. The system
+integrity of a protected guest depends on the Share and Unshare UVCs
+being successful. This means that any caller that fails to check the
+return value will compromise the security of the protected guest.
+
+No code path that would lead to such violation of the security
+guarantees is currently exercised, since all the areas that are shared
+never get unshared during the lifetime of the system. This might
+change and become an issue in the future.
+
+The Share and Unshare UVCs can only fail in case of hypervisor
+misbehaviour (either a bug or malicious behaviour). In such cases there
+is no reasonable way forward, and the system needs to panic.
+
+This patch replaces the return at the end of the share() function with
+a panic, to guarantee system integrity.
+
+Fixes: 5abb9351dfd9 ("s390/uv: introduce guest side ultravisor code")
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240801112548.85303-1-imbrenda@linux.ibm.com
+Message-ID: <20240801112548.85303-1-imbrenda@linux.ibm.com>
+[frankja@linux.ibm.com: Fixed up patch subject]
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/uv.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
+index 0e7bd3873907f..b2e2f9a4163c5 100644
+--- a/arch/s390/include/asm/uv.h
++++ b/arch/s390/include/asm/uv.h
+@@ -442,7 +442,10 @@ static inline int share(unsigned long addr, u16 cmd)
+ 
+       if (!uv_call(0, (u64)&uvcb))
+               return 0;
+-      return -EINVAL;
++      pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n",
++             uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare",
++             uvcb.header.rc, uvcb.header.rrc);
++      panic("System security cannot be guaranteed unless the system panics now.\n");
+ }
+ 
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.10/selftest-af_unix-fix-kselftest-compilation-warnings.patch b/queue-6.10/selftest-af_unix-fix-kselftest-compilation-warnings.patch

new file mode 100644 (file)

index 0000000..41d879a
--- /dev/null
+++ b/queue-6.10/selftest-af_unix-fix-kselftest-compilation-warnings.patch
@@ -0,0 +1,59 @@
+From 94c888fc5e011ff2bf2b291f1f85ae46a62435c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Aug 2024 13:37:43 +0530
+Subject: selftest: af_unix: Fix kselftest compilation warnings
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Abhinav Jain <jain.abhinav177@gmail.com>
+
+[ Upstream commit 6c569b77f0300f8a9960277c7094fa0f128eb811 ]
+
+Change expected_buf from (const void *) to (const char *)
+in function __recvpair().
+This change fixes the below warnings during test compilation:
+
+```
+In file included from msg_oob.c:14:
+msg_oob.c: In function ‘__recvpair’:
+
+../../kselftest_harness.h:106:40: warning: format ‘%s’ expects argument
+of type ‘char *’,but argument 6 has type ‘const void *’ [-Wformat=]
+
+../../kselftest_harness.h:101:17: note: in expansion of macro ‘__TH_LOG’
+msg_oob.c:235:17: note: in expansion of macro ‘TH_LOG’
+
+../../kselftest_harness.h:106:40: warning: format ‘%s’ expects argument
+of type ‘char *’,but argument 6 has type ‘const void *’ [-Wformat=]
+
+../../kselftest_harness.h:101:17: note: in expansion of macro ‘__TH_LOG’
+msg_oob.c:259:25: note: in expansion of macro ‘TH_LOG’
+```
+
+Fixes: d098d77232c3 ("selftest: af_unix: Add msg_oob.c.")
+Signed-off-by: Abhinav Jain <jain.abhinav177@gmail.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20240814080743.1156166-1-jain.abhinav177@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/af_unix/msg_oob.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
+index 16d0c172eaebe..535eb2c3d7d1c 100644
+--- a/tools/testing/selftests/net/af_unix/msg_oob.c
++++ b/tools/testing/selftests/net/af_unix/msg_oob.c
+@@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata,
+ 
+ static void __recvpair(struct __test_metadata *_metadata,
+                      FIXTURE_DATA(msg_oob) *self,
+-                     const void *expected_buf, int expected_len,
++                     const char *expected_buf, int expected_len,
+                      int buf_len, int flags)
+ {
+       int i, ret[2], recv_errno[2], expected_errno = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.10/selftests-net-lib-ignore-possible-errors.patch b/queue-6.10/selftests-net-lib-ignore-possible-errors.patch

new file mode 100644 (file)

index 0000000..3353f41
--- /dev/null
+++ b/queue-6.10/selftests-net-lib-ignore-possible-errors.patch
@@ -0,0 +1,56 @@
+From 50ae8769063286f6ac7e75f035fea1ca04ed2f27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jun 2024 18:31:02 +0200
+Subject: selftests: net: lib: ignore possible errors
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 7e0620bc6a5ec6b340a0be40054f294ca26c010f ]
+
+No need to disable errexit temporary, simply ignore the only possible
+and not handled error.
+
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240607-upstream-net-next-20240607-selftests-mptcp-net-lib-v1-1-e36986faac94@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 7965a7f32a53 ("selftests: net: lib: kill PIDs before del netns")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/lib.sh | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
+index 9155c914c064f..b2572aff6286f 100644
+--- a/tools/testing/selftests/net/lib.sh
++++ b/tools/testing/selftests/net/lib.sh
+@@ -128,25 +128,17 @@ slowwait_for_counter()
+ cleanup_ns()
+ {
+       local ns=""
+-      local errexit=0
+       local ret=0
+ 
+-      # disable errexit temporary
+-      if [[ $- =~ "e" ]]; then
+-              errexit=1
+-              set +e
+-      fi
+-
+       for ns in "$@"; do
+               [ -z "${ns}" ] && continue
+-              ip netns delete "${ns}" &> /dev/null
++              ip netns delete "${ns}" &> /dev/null || true
+               if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+                       echo "Warn: Failed to remove namespace $ns"
+                       ret=1
+               fi
+       done
+ 
+-      [ $errexit -eq 1 ] && set -e
+       return $ret
+ }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.10/selftests-net-lib-kill-pids-before-del-netns.patch b/queue-6.10/selftests-net-lib-kill-pids-before-del-netns.patch

new file mode 100644 (file)

index 0000000..41d7840
--- /dev/null
+++ b/queue-6.10/selftests-net-lib-kill-pids-before-del-netns.patch
@@ -0,0 +1,45 @@
+From 2b2048e20d2882808c391b4a15fa3b6d47770159 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 15:39:34 +0200
+Subject: selftests: net: lib: kill PIDs before del netns
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 7965a7f32a53d9ad807ce2c53bdda69ba104974f ]
+
+When deleting netns, it is possible to still have some tasks running,
+e.g. background tasks like tcpdump running in the background, not
+stopped because the test has been interrupted.
+
+Before deleting the netns, it is then safer to kill all attached PIDs,
+if any. That should reduce some noises after the end of some tests, and
+help with the debugging of some issues. That's why this modification is
+seen as a "fix".
+
+Fixes: 25ae948b4478 ("selftests/net: add lib.sh")
+Acked-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Acked-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://patch.msgid.link/20240813-upstream-net-20240813-selftests-net-lib-kill-v1-1-27b689b248b8@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/lib.sh | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
+index b2572aff6286f..93de05fedd91a 100644
+--- a/tools/testing/selftests/net/lib.sh
++++ b/tools/testing/selftests/net/lib.sh
+@@ -132,6 +132,7 @@ cleanup_ns()
+ 
+       for ns in "$@"; do
+               [ -z "${ns}" ] && continue
++              ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true
+               ip netns delete "${ns}" &> /dev/null || true
+               if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+                       echo "Warn: Failed to remove namespace $ns"
+-- 
+2.43.0
+
diff --git a/queue-6.10/series b/queue-6.10/series

index 26c1951212a1e9860e1c57e55444b7398b9412bb..172455e2024c80c9e5511b4bb13b14796e369ed9 100644 (file)
--- a/queue-6.10/series
+++ b/queue-6.10/series
@@ -74,3 +74,51 @@ drm-amdgpu-jpeg2-properly-set-atomics-vmid-field.patch
  drm-amdgpu-jpeg4-properly-set-atomics-vmid-field.patch
  drm-amd-amdgpu-command-submission-parser-for-jpeg.patch
  pidfd-prevent-creation-of-pidfds-for-kthreads.patch
+s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch
+netfs-fault-in-smaller-chunks-for-non-large-folio-ma.patch
+filelock-fix-name-of-file_lease-slab-cache.patch
+libfs-fix-infinite-directory-reads-for-offset-dir.patch
+bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch
+bpf-fix-a-kernel-verifier-crash-in-stacksafe.patch
+9p-fix-dio-read-through-netfs.patch
+btrfs-fix-invalid-mapping-of-extent-xarray-state.patch
+igc-fix-packet-still-tx-after-gate-close-by-reducing.patch
+igc-fix-qbv_config_change_errors-logics.patch
+igc-fix-reset-adapter-logics-when-tx-mode-change.patch
+igc-fix-qbv-tx-latency-by-setting-gtxoffset.patch
+gtp-pull-network-headers-in-gtp_dev_xmit.patch
+net-mlx5-sd-do-not-query-mpir-register-if-no-sd_grou.patch
+net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch
+net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch
+atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch
+net-axienet-fix-register-defines-comment-description.patch
+net-dsa-vsc73xx-fix-port-mac-configuration-in-full-d.patch
+net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch
+net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch
+net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch
+mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch
+mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch
+tcp-update-window-clamping-condition.patch
+netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch
+netfilter-nfnetlink-initialise-extack-before-use-in-.patch
+netfilter-flowtable-initialise-extack-before-use.patch
+netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch
+netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch
+netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch
+netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch
+selftest-af_unix-fix-kselftest-compilation-warnings.patch
+vsock-fix-recursive-recvmsg-calls.patch
+selftests-net-lib-ignore-possible-errors.patch
+selftests-net-lib-kill-pids-before-del-netns.patch
+net-hns3-fix-wrong-use-of-semaphore-up.patch
+net-hns3-use-the-user-s-cfg-after-reset.patch
+net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch
+kbuild-refactor-variables-in-scripts-link-vmlinux.sh.patch
+kbuild-remove-provide-for-kallsyms-symbols.patch
+kallsyms-get-rid-of-code-for-absolute-kallsyms.patch
+kallsyms-do-not-cleanup-.llvm.-hash-suffix-before-so.patch
+kallsyms-match-symbols-exactly-with-config_lto_clang.patch
+iommu-restore-lost-return-in-iommu_report_device_fau.patch
+gpio-mlxbf3-support-shutdown-function.patch
+alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch
+drm-v3d-fix-out-of-bounds-read-in-v3d_csd_job_run.patch
diff --git a/queue-6.10/tcp-update-window-clamping-condition.patch b/queue-6.10/tcp-update-window-clamping-condition.patch

new file mode 100644 (file)

index 0000000..65002f0
--- /dev/null
+++ b/queue-6.10/tcp-update-window-clamping-condition.patch
@@ -0,0 +1,94 @@
+From 4321f96793b0ba696158b175bf2bc0e2c68a6dfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 16:06:40 -0700
+Subject: tcp: Update window clamping condition
+
+From: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
+
+[ Upstream commit a2cbb1603943281a604f5adc48079a148db5cb0d ]
+
+This patch is based on the discussions between Neal Cardwell and
+Eric Dumazet in the link
+https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/
+
+It was correctly pointed out that tp->window_clamp would not be
+updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if
+(copied <= tp->rcvq_space.space). While it is expected for most
+setups to leave the sysctl enabled, the latter condition may
+not end up hitting depending on the TCP receive queue size and
+the pattern of arriving data.
+
+The updated check should be hit only on initial MSS update from
+TCP_MIN_MSS to measured MSS value and subsequently if there was
+an update to a larger value.
+
+Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF")
+Signed-off-by: Sean Tranchetti <quic_stranche@quicinc.com>
+Signed-off-by: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 28 ++++++++++++----------------
+ 1 file changed, 12 insertions(+), 16 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index ecd521108559f..2c52f6dcbd290 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
+                */
+               if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+                       u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
++                      u8 old_ratio = tcp_sk(sk)->scaling_ratio;
+ 
+                       do_div(val, skb->truesize);
+                       tcp_sk(sk)->scaling_ratio = val ? val : 1;
++
++                      if (old_ratio != tcp_sk(sk)->scaling_ratio)
++                              WRITE_ONCE(tcp_sk(sk)->window_clamp,
++                                         tcp_win_from_space(sk, sk->sk_rcvbuf));
+               }
+               icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
+                                              tcp_sk(sk)->advmss);
+@@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
+        * <prev RTT . ><current RTT .. ><next RTT .... >
+        */
+ 
+-      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) {
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
++          !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+               u64 rcvwin, grow;
+               int rcvbuf;
+ 
+@@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk)
+ 
+               rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
+                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+-              if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+-                      if (rcvbuf > sk->sk_rcvbuf) {
+-                              WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+-
+-                              /* Make the window clamp follow along.  */
+-                              WRITE_ONCE(tp->window_clamp,
+-                                         tcp_win_from_space(sk, rcvbuf));
+-                      }
+-              } else {
+-                      /* Make the window clamp follow along while being bounded
+-                       * by SO_RCVBUF.
+-                       */
+-                      int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf));
++              if (rcvbuf > sk->sk_rcvbuf) {
++                      WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+ 
+-                      if (clamp > tp->window_clamp)
+-                              WRITE_ONCE(tp->window_clamp, clamp);
++                      /* Make the window clamp follow along.  */
++                      WRITE_ONCE(tp->window_clamp,
++                                 tcp_win_from_space(sk, rcvbuf));
+               }
+       }
+       tp->rcvq_space.space = copied;
+-- 
+2.43.0
+
diff --git a/queue-6.10/vsock-fix-recursive-recvmsg-calls.patch b/queue-6.10/vsock-fix-recursive-recvmsg-calls.patch

new file mode 100644 (file)

index 0000000..a802da6
--- /dev/null
+++ b/queue-6.10/vsock-fix-recursive-recvmsg-calls.patch
@@ -0,0 +1,172 @@
+From 8606a96e97da12b62af75610bc6a0f3d3818c1a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Aug 2024 19:21:53 -0700
+Subject: vsock: fix recursive ->recvmsg calls
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 69139d2919dd4aa9a553c8245e7c63e82613e3fc ]
+
+After a vsock socket has been added to a BPF sockmap, its prot->recvmsg
+has been replaced with vsock_bpf_recvmsg(). Thus the following
+recursiion could happen:
+
+vsock_bpf_recvmsg()
+ -> __vsock_recvmsg()
+  -> vsock_connectible_recvmsg()
+   -> prot->recvmsg()
+    -> vsock_bpf_recvmsg() again
+
+We need to fix it by calling the original ->recvmsg() without any BPF
+sockmap logic in __vsock_recvmsg().
+
+Fixes: 634f1a7110b4 ("vsock: support sockmap")
+Reported-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com
+Tested-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com
+Cc: Bobby Eshleman <bobby.eshleman@bytedance.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Link: https://patch.msgid.link/20240812022153.86512-1-xiyou.wangcong@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/af_vsock.h    |  4 ++++
+ net/vmw_vsock/af_vsock.c  | 50 +++++++++++++++++++++++----------------
+ net/vmw_vsock/vsock_bpf.c |  4 ++--
+ 3 files changed, 35 insertions(+), 23 deletions(-)
+
+diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
+index 535701efc1e5c..24d970f7a4fa2 100644
+--- a/include/net/af_vsock.h
++++ b/include/net/af_vsock.h
+@@ -230,8 +230,12 @@ struct vsock_tap {
+ int vsock_add_tap(struct vsock_tap *vt);
+ int vsock_remove_tap(struct vsock_tap *vt);
+ void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
++int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
++                              int flags);
+ int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+                             int flags);
++int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
++                        size_t len, int flags);
+ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+                       size_t len, int flags);
+ 
+diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
+index 4b040285aa78c..0ff9b2dd86bac 100644
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -1270,25 +1270,28 @@ static int vsock_dgram_connect(struct socket *sock,
+       return err;
+ }
+ 
++int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
++                        size_t len, int flags)
++{
++      struct sock *sk = sock->sk;
++      struct vsock_sock *vsk = vsock_sk(sk);
++
++      return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
++}
++
+ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+                       size_t len, int flags)
+ {
+ #ifdef CONFIG_BPF_SYSCALL
++      struct sock *sk = sock->sk;
+       const struct proto *prot;
+-#endif
+-      struct vsock_sock *vsk;
+-      struct sock *sk;
+ 
+-      sk = sock->sk;
+-      vsk = vsock_sk(sk);
+-
+-#ifdef CONFIG_BPF_SYSCALL
+       prot = READ_ONCE(sk->sk_prot);
+       if (prot != &vsock_proto)
+               return prot->recvmsg(sk, msg, len, flags, NULL);
+ #endif
+ 
+-      return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
++      return __vsock_dgram_recvmsg(sock, msg, len, flags);
+ }
+ EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
+ 
+@@ -2174,15 +2177,12 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
+ }
+ 
+ int
+-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+-                        int flags)
++__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
++                          int flags)
+ {
+       struct sock *sk;
+       struct vsock_sock *vsk;
+       const struct vsock_transport *transport;
+-#ifdef CONFIG_BPF_SYSCALL
+-      const struct proto *prot;
+-#endif
+       int err;
+ 
+       sk = sock->sk;
+@@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+               goto out;
+       }
+ 
+-#ifdef CONFIG_BPF_SYSCALL
+-      prot = READ_ONCE(sk->sk_prot);
+-      if (prot != &vsock_proto) {
+-              release_sock(sk);
+-              return prot->recvmsg(sk, msg, len, flags, NULL);
+-      }
+-#endif
+-
+       if (sk->sk_type == SOCK_STREAM)
+               err = __vsock_stream_recvmsg(sk, msg, len, flags);
+       else
+@@ -2250,6 +2242,22 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+       release_sock(sk);
+       return err;
+ }
++
++int
++vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
++                        int flags)
++{
++#ifdef CONFIG_BPF_SYSCALL
++      struct sock *sk = sock->sk;
++      const struct proto *prot;
++
++      prot = READ_ONCE(sk->sk_prot);
++      if (prot != &vsock_proto)
++              return prot->recvmsg(sk, msg, len, flags, NULL);
++#endif
++
++      return __vsock_connectible_recvmsg(sock, msg, len, flags);
++}
+ EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);
+ 
+ static int vsock_set_rcvlowat(struct sock *sk, int val)
+diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
+index a3c97546ab84a..c42c5cc18f324 100644
+--- a/net/vmw_vsock/vsock_bpf.c
++++ b/net/vmw_vsock/vsock_bpf.c
+@@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int
+       int err;
+ 
+       if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
+-              err = vsock_connectible_recvmsg(sock, msg, len, flags);
++              err = __vsock_connectible_recvmsg(sock, msg, len, flags);
+       else if (sk->sk_type == SOCK_DGRAM)
+-              err = vsock_dgram_recvmsg(sock, msg, len, flags);
++              err = __vsock_dgram_recvmsg(sock, msg, len, flags);
+       else
+               err = -EPROTOTYPE;
+ 
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 19 Aug 2024 14:19:29 +0000 (10:19 -0400)
queue-6.10/9p-fix-dio-read-through-netfs.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/bpf-fix-a-kernel-verifier-crash-in-stacksafe.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/btrfs-fix-invalid-mapping-of-extent-xarray-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/drm-v3d-fix-out-of-bounds-read-in-v3d_csd_job_run.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/filelock-fix-name-of-file_lease-slab-cache.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/gpio-mlxbf3-support-shutdown-function.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/gtp-pull-network-headers-in-gtp_dev_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/igc-fix-qbv-tx-latency-by-setting-gtxoffset.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/igc-fix-qbv_config_change_errors-logics.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/igc-fix-reset-adapter-logics-when-tx-mode-change.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/iommu-restore-lost-return-in-iommu_report_device_fau.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kallsyms-do-not-cleanup-.llvm.-hash-suffix-before-so.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kallsyms-get-rid-of-code-for-absolute-kallsyms.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kallsyms-match-symbols-exactly-with-config_lto_clang.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kbuild-refactor-variables-in-scripts-link-vmlinux.sh.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/kbuild-remove-provide-for-kallsyms-symbols.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/libfs-fix-infinite-directory-reads-for-offset-dir.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-axienet-fix-register-defines-comment-description.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-dsa-vsc73xx-fix-port-mac-configuration-in-full-d.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-hns3-fix-wrong-use-of-semaphore-up.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-hns3-use-the-user-s-cfg-after-reset.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-mlx5-sd-do-not-query-mpir-register-if-no-sd_grou.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-flowtable-initialise-extack-before-use.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfilter-nfnetlink-initialise-extack-before-use-in-.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/netfs-fault-in-smaller-chunks-for-non-large-folio-ma.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selftest-af_unix-fix-kselftest-compilation-warnings.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selftests-net-lib-ignore-possible-errors.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/selftests-net-lib-kill-pids-before-del-netns.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/series		patch \| blob \| blame \| history
queue-6.10/tcp-update-window-clamping-condition.patch	[new file with mode: 0644]	patch \| blob
queue-6.10/vsock-fix-recursive-recvmsg-calls.patch	[new file with mode: 0644]	patch \| blob