]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.12
authorSasha Levin <sashal@kernel.org>
Fri, 27 Dec 2024 15:06:13 +0000 (10:06 -0500)
committerSasha Levin <sashal@kernel.org>
Fri, 27 Dec 2024 15:06:13 +0000 (10:06 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
15 files changed:
queue-6.12/arm64-dts-broadcom-fix-l2-linesize-for-raspberry-pi-.patch [new file with mode: 0644]
queue-6.12/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch [new file with mode: 0644]
queue-6.12/bpf-fix-bpf_get_smp_processor_id-on-config_smp.patch [new file with mode: 0644]
queue-6.12/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch [new file with mode: 0644]
queue-6.12/fork-avoid-inappropriate-uprobe-access-to-invalid-mm.patch [new file with mode: 0644]
queue-6.12/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch [new file with mode: 0644]
queue-6.12/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch [new file with mode: 0644]
queue-6.12/nfsd-restore-callback-functionality-for-nfsv4.0.patch [new file with mode: 0644]
queue-6.12/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch [new file with mode: 0644]
queue-6.12/selftests-bpf-fix-compilation-error-in-get_uprobe_of.patch [new file with mode: 0644]
queue-6.12/series [new file with mode: 0644]
queue-6.12/smb-client-deduplicate-select-netfs_support-in-kconf.patch [new file with mode: 0644]
queue-6.12/smb-fix-bytes-written-value-in-proc-fs-cifs-stats.patch [new file with mode: 0644]
queue-6.12/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch [new file with mode: 0644]
queue-6.12/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch [new file with mode: 0644]

diff --git a/queue-6.12/arm64-dts-broadcom-fix-l2-linesize-for-raspberry-pi-.patch b/queue-6.12/arm64-dts-broadcom-fix-l2-linesize-for-raspberry-pi-.patch
new file mode 100644 (file)
index 0000000..9fe3d9b
--- /dev/null
@@ -0,0 +1,73 @@
+From 58dc82567b4d6f2f6d87702eece472d4631dcdf9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 17:29:54 -0400
+Subject: arm64: dts: broadcom: Fix L2 linesize for Raspberry Pi 5
+
+From: Willow Cunningham <willow.e.cunningham@gmail.com>
+
+[ Upstream commit 058387d9c6b70e225da82492e1e193635c3fac3f ]
+
+Set the cache-line-size parameter of the L2 cache for each core to the
+correct value of 64 bytes.
+
+Previously, the L2 cache line size was incorrectly set to 128 bytes
+for the Broadcom BCM2712. This causes validation tests for the
+Performance Application Programming Interface (PAPI) tool to fail as
+they depend on sysfs accurately reporting cache line sizes.
+
+The correct value of 64 bytes is stated in the official documentation of
+the ARM Cortex A-72, which is linked in the comments of
+arm64/boot/dts/broadcom/bcm2712.dtsi as the source for cache-line-size.
+
+Fixes: faa3381267d0 ("arm64: dts: broadcom: Add minimal support for Raspberry Pi 5")
+Signed-off-by: Willow Cunningham <willow.e.cunningham@maine.edu>
+Link: https://lore.kernel.org/r/20241007212954.214724-1-willow.e.cunningham@maine.edu
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/broadcom/bcm2712.dtsi | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
+index 6e5a984c1d4e..26a29e5e5078 100644
+--- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
++++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
+@@ -67,7 +67,7 @@
+                       l2_cache_l0: l2-cache-l0 {
+                               compatible = "cache";
+                               cache-size = <0x80000>;
+-                              cache-line-size = <128>;
++                              cache-line-size = <64>;
+                               cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
+                               cache-level = <2>;
+                               cache-unified;
+@@ -91,7 +91,7 @@
+                       l2_cache_l1: l2-cache-l1 {
+                               compatible = "cache";
+                               cache-size = <0x80000>;
+-                              cache-line-size = <128>;
++                              cache-line-size = <64>;
+                               cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
+                               cache-level = <2>;
+                               cache-unified;
+@@ -115,7 +115,7 @@
+                       l2_cache_l2: l2-cache-l2 {
+                               compatible = "cache";
+                               cache-size = <0x80000>;
+-                              cache-line-size = <128>;
++                              cache-line-size = <64>;
+                               cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
+                               cache-level = <2>;
+                               cache-unified;
+@@ -139,7 +139,7 @@
+                       l2_cache_l3: l2-cache-l3 {
+                               compatible = "cache";
+                               cache-size = <0x80000>;
+-                              cache-line-size = <128>;
++                              cache-line-size = <64>;
+                               cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
+                               cache-level = <2>;
+                               cache-unified;
+-- 
+2.39.5
+
diff --git a/queue-6.12/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch b/queue-6.12/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch
new file mode 100644 (file)
index 0000000..ae17b2b
--- /dev/null
@@ -0,0 +1,66 @@
+From d8715a68f34e65b712d4f4dc258c65fb57a62c4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 19:40:54 -0800
+Subject: bpf: Check negative offsets in __bpf_skb_min_len()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 9ecc4d858b92c1bb0673ad9c327298e600c55659 ]
+
+skb_network_offset() and skb_transport_offset() can be negative when
+they are called after we pull the transport header, for example, when
+we use eBPF sockmap at the point of ->sk_data_ready().
+
+__bpf_skb_min_len() uses an unsigned int to get these offsets, this
+leads to a very large number which then causes bpf_skb_change_tail()
+failed unexpectedly.
+
+Fix this by using a signed int to get these offsets and ensure the
+minimum is at least zero.
+
+Fixes: 5293efe62df8 ("bpf: add bpf_skb_change_tail helper")
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241213034057.246437-2-xiyou.wangcong@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 9a459213d283..55495063621d 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3751,13 +3751,22 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
+ static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+ {
+-      u32 min_len = skb_network_offset(skb);
++      int offset = skb_network_offset(skb);
++      u32 min_len = 0;
+-      if (skb_transport_header_was_set(skb))
+-              min_len = skb_transport_offset(skb);
+-      if (skb->ip_summed == CHECKSUM_PARTIAL)
+-              min_len = skb_checksum_start_offset(skb) +
+-                        skb->csum_offset + sizeof(__sum16);
++      if (offset > 0)
++              min_len = offset;
++      if (skb_transport_header_was_set(skb)) {
++              offset = skb_transport_offset(skb);
++              if (offset > 0)
++                      min_len = offset;
++      }
++      if (skb->ip_summed == CHECKSUM_PARTIAL) {
++              offset = skb_checksum_start_offset(skb) +
++                       skb->csum_offset + sizeof(__sum16);
++              if (offset > 0)
++                      min_len = offset;
++      }
+       return min_len;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/bpf-fix-bpf_get_smp_processor_id-on-config_smp.patch b/queue-6.12/bpf-fix-bpf_get_smp_processor_id-on-config_smp.patch
new file mode 100644 (file)
index 0000000..a7d5b0b
--- /dev/null
@@ -0,0 +1,51 @@
+From 3dd03a0c419509c2c7fbe9ee3b74f882d510e06b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Dec 2024 20:58:13 +0100
+Subject: bpf: Fix bpf_get_smp_processor_id() on !CONFIG_SMP
+
+From: Andrea Righi <arighi@nvidia.com>
+
+[ Upstream commit 23579010cf0a12476e96a5f1acdf78a9c5843657 ]
+
+On x86-64 calling bpf_get_smp_processor_id() in a kernel with CONFIG_SMP
+disabled can trigger the following bug, as pcpu_hot is unavailable:
+
+ [    8.471774] BUG: unable to handle page fault for address: 00000000936a290c
+ [    8.471849] #PF: supervisor read access in kernel mode
+ [    8.471881] #PF: error_code(0x0000) - not-present page
+
+Fix by inlining a return 0 in the !CONFIG_SMP case.
+
+Fixes: 1ae6921009e5 ("bpf: inline bpf_get_smp_processor_id() helper")
+Signed-off-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241217195813.622568-1-arighi@nvidia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 4c486a0bfcc4..84d958f2c031 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -21085,11 +21085,15 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
+                        * changed in some incompatible and hard to support
+                        * way, it's fine to back out this inlining logic
+                        */
++#ifdef CONFIG_SMP
+                       insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
+                       insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+                       insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
+                       cnt = 3;
+-
++#else
++                      insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
++                      cnt = 1;
++#endif
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+-- 
+2.39.5
+
diff --git a/queue-6.12/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch b/queue-6.12/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch
new file mode 100644 (file)
index 0000000..73d0f43
--- /dev/null
@@ -0,0 +1,50 @@
+From ca059debfb07b455d1eb38ba0f3ba642007b53f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Dec 2024 17:33:25 +0100
+Subject: ceph: allocate sparse_ext map only for sparse reads
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+[ Upstream commit 18d44c5d062b97b97bb0162d9742440518958dc1 ]
+
+If mounted with sparseread option, ceph_direct_read_write() ends up
+making an unnecessarily allocation for O_DIRECT writes.
+
+Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Alex Markuze <amarkuze@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/file.c        | 2 +-
+ net/ceph/osd_client.c | 2 ++
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index 67468d88f139..851d70200c6b 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1552,7 +1552,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+               }
+               op = &req->r_ops[0];
+-              if (sparse) {
++              if (!write && sparse) {
+                       extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+                       ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+                       if (ret) {
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 9d078b37fe0b..abac770bc0b4 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1173,6 +1173,8 @@ EXPORT_SYMBOL(ceph_osdc_new_request);
+ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+ {
++      WARN_ON(op->op != CEPH_OSD_OP_SPARSE_READ);
++
+       op->extent.sparse_ext_cnt = cnt;
+       op->extent.sparse_ext = kmalloc_array(cnt,
+                                             sizeof(*op->extent.sparse_ext),
+-- 
+2.39.5
+
diff --git a/queue-6.12/fork-avoid-inappropriate-uprobe-access-to-invalid-mm.patch b/queue-6.12/fork-avoid-inappropriate-uprobe-access-to-invalid-mm.patch
new file mode 100644 (file)
index 0000000..7118be1
--- /dev/null
@@ -0,0 +1,123 @@
+From f5795da9af07dff2c2475382c756b6a300563ac4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 17:24:12 +0000
+Subject: fork: avoid inappropriate uprobe access to invalid mm
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+[ Upstream commit 8ac662f5da19f5873fdd94c48a5cdb45b2e1b58f ]
+
+If dup_mmap() encounters an issue, currently uprobe is able to access the
+relevant mm via the reverse mapping (in build_map_info()), and if we are
+very unlucky with a race window, observe invalid XA_ZERO_ENTRY state which
+we establish as part of the fork error path.
+
+This occurs because uprobe_write_opcode() invokes anon_vma_prepare() which
+in turn invokes find_mergeable_anon_vma() that uses a VMA iterator,
+invoking vma_iter_load() which uses the advanced maple tree API and thus
+is able to observe XA_ZERO_ENTRY entries added to dup_mmap() in commit
+d24062914837 ("fork: use __mt_dup() to duplicate maple tree in
+dup_mmap()").
+
+This change was made on the assumption that only process tear-down code
+would actually observe (and make use of) these values.  However this very
+unlikely but still possible edge case with uprobes exists and
+unfortunately does make these observable.
+
+The uprobe operation prevents races against the dup_mmap() operation via
+the dup_mmap_sem semaphore, which is acquired via uprobe_start_dup_mmap()
+and dropped via uprobe_end_dup_mmap(), and held across
+register_for_each_vma() prior to invoking build_map_info() which does the
+reverse mapping lookup.
+
+Currently these are acquired and dropped within dup_mmap(), which exposes
+the race window prior to error handling in the invoking dup_mm() which
+tears down the mm.
+
+We can avoid all this by just moving the invocation of
+uprobe_start_dup_mmap() and uprobe_end_dup_mmap() up a level to dup_mm()
+and only release this lock once the dup_mmap() operation succeeds or clean
+up is done.
+
+This means that the uprobe code can never observe an incompletely
+constructed mm and resolves the issue in this case.
+
+Link: https://lkml.kernel.org/r/20241210172412.52995-1-lorenzo.stoakes@oracle.com
+Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: syzbot+2d788f4f7cb660dac4b7@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/6756d273.050a0220.2477f.003d.GAE@google.com/
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peng Zhang <zhangpeng.00@bytedance.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: David Hildenbrand <david@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/fork.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/fork.c b/kernel/fork.c
+index ce8be55e5e04..e192bdbc9ade 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -640,11 +640,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+       LIST_HEAD(uf);
+       VMA_ITERATOR(vmi, mm, 0);
+-      uprobe_start_dup_mmap();
+-      if (mmap_write_lock_killable(oldmm)) {
+-              retval = -EINTR;
+-              goto fail_uprobe_end;
+-      }
++      if (mmap_write_lock_killable(oldmm))
++              return -EINTR;
+       flush_cache_dup_mm(oldmm);
+       uprobe_dup_mmap(oldmm, mm);
+       /*
+@@ -783,8 +780,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+               dup_userfaultfd_complete(&uf);
+       else
+               dup_userfaultfd_fail(&uf);
+-fail_uprobe_end:
+-      uprobe_end_dup_mmap();
+       return retval;
+ fail_nomem_anon_vma_fork:
+@@ -1692,9 +1687,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
+       if (!mm_init(mm, tsk, mm->user_ns))
+               goto fail_nomem;
++      uprobe_start_dup_mmap();
+       err = dup_mmap(mm, oldmm);
+       if (err)
+               goto free_pt;
++      uprobe_end_dup_mmap();
+       mm->hiwater_rss = get_mm_rss(mm);
+       mm->hiwater_vm = mm->total_vm;
+@@ -1709,6 +1706,8 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
+       mm->binfmt = NULL;
+       mm_init_owner(mm, NULL);
+       mmput(mm);
++      if (err)
++              uprobe_end_dup_mmap();
+ fail_nomem:
+       return NULL;
+-- 
+2.39.5
+
diff --git a/queue-6.12/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch b/queue-6.12/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch
new file mode 100644 (file)
index 0000000..e77a2c9
--- /dev/null
@@ -0,0 +1,62 @@
+From 7f2f89c8ce9addcadc9b995b1b72cc2647bbd28f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 08:58:00 -0700
+Subject: media: dvb-frontends: dib3000mb: fix uninit-value in
+ dib3000_write_reg
+
+From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
+
+[ Upstream commit 2dd59fe0e19e1ab955259978082b62e5751924c7 ]
+
+Syzbot reports [1] an uninitialized value issue found by KMSAN in
+dib3000_read_reg().
+
+Local u8 rb[2] is used in i2c_transfer() as a read buffer; in case
+that call fails, the buffer may end up with some undefined values.
+
+Since no elaborate error handling is expected in dib3000_write_reg(),
+simply zero out rb buffer to mitigate the problem.
+
+[1] Syzkaller report
+dvb-usb: bulk message failed: -22 (6/0)
+=====================================================
+BUG: KMSAN: uninit-value in dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+ dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+ dibusb_dib3000mb_frontend_attach+0x155/0x2f0 drivers/media/usb/dvb-usb/dibusb-mb.c:31
+ dvb_usb_adapter_frontend_init+0xed/0x9a0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290
+ dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:90 [inline]
+ dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:186 [inline]
+ dvb_usb_device_init+0x25a8/0x3760 drivers/media/usb/dvb-usb/dvb-usb-init.c:310
+ dibusb_probe+0x46/0x250 drivers/media/usb/dvb-usb/dibusb-mb.c:110
+...
+Local variable rb created at:
+ dib3000_read_reg+0x86/0x4e0 drivers/media/dvb-frontends/dib3000mb.c:54
+ dib3000mb_attach+0x123/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+...
+
+Fixes: 74340b0a8bc6 ("V4L/DVB (4457): Remove dib3000-common-module")
+Reported-by: syzbot+c88fc0ebe0d5935c70da@syzkaller.appspotmail.com
+Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
+Link: https://lore.kernel.org/r/20240517155800.9881-1-n.zhandarovich@fintech.ru
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/media/dvb-frontends/dib3000mb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
+index c598b2a63325..7c452ddd9e40 100644
+--- a/drivers/media/dvb-frontends/dib3000mb.c
++++ b/drivers/media/dvb-frontends/dib3000mb.c
+@@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a
+ static int dib3000_read_reg(struct dib3000_state *state, u16 reg)
+ {
+       u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff };
+-      u8 rb[2];
++      u8 rb[2] = {};
+       struct i2c_msg msg[] = {
+               { .addr = state->config.demod_address, .flags = 0,        .buf = wb, .len = 2 },
+               { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 },
+-- 
+2.39.5
+
diff --git a/queue-6.12/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch b/queue-6.12/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch
new file mode 100644 (file)
index 0000000..070567b
--- /dev/null
@@ -0,0 +1,43 @@
+From cc95324b56c57743ba4990089b2e6d50f0090ef2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 13:31:26 -0800
+Subject: mm/vmstat: fix a W=1 clang compiler warning
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit 30c2de0a267c04046d89e678cc0067a9cfb455df ]
+
+Fix the following clang compiler warning that is reported if the kernel is
+built with W=1:
+
+./include/linux/vmstat.h:518:36: error: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Werror,-Wenum-enum-conversion]
+  518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
+      |                               ~~~~~~~~~~~ ^ ~~~
+
+Link: https://lkml.kernel.org/r/20241212213126.1269116-1-bvanassche@acm.org
+Fixes: 9d7ea9a297e6 ("mm/vmstat: add helpers to get vmstat item names for each enum type")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/vmstat.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
+index d2761bf8ff32..9f3a04345b86 100644
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -515,7 +515,7 @@ static inline const char *node_stat_name(enum node_stat_item item)
+ static inline const char *lru_list_name(enum lru_list lru)
+ {
+-      return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
++      return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_"
+ }
+ #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
+-- 
+2.39.5
+
diff --git a/queue-6.12/nfsd-restore-callback-functionality-for-nfsv4.0.patch b/queue-6.12/nfsd-restore-callback-functionality-for-nfsv4.0.patch
new file mode 100644 (file)
index 0000000..38f2568
--- /dev/null
@@ -0,0 +1,51 @@
+From 655199878ea5dbc966311df355b0bbc942a8dd99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Dec 2024 15:28:18 +1100
+Subject: nfsd: restore callback functionality for NFSv4.0
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 7917f01a286ce01e9c085e24468421f596ee1a0c ]
+
+A recent patch inadvertently broke callbacks for NFSv4.0.
+
+In the 4.0 case we do not expect a session to be found but still need to
+call setup_callback_client() which will not try to dereference it.
+
+This patch moves the check for failure to find a session into the 4.1+
+branch of setup_callback_client()
+
+Fixes: 1e02c641c3a4 ("NFSD: Prevent NULL dereference in nfsd4_process_cb_update()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4callback.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index b8cbb1556004..de0763652549 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -1058,7 +1058,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
+               args.authflavor = clp->cl_cred.cr_flavor;
+               clp->cl_cb_ident = conn->cb_ident;
+       } else {
+-              if (!conn->cb_xprt)
++              if (!conn->cb_xprt || !ses)
+                       return -EINVAL;
+               clp->cl_cb_session = ses;
+               args.bc_xprt = conn->cb_xprt;
+@@ -1461,8 +1461,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
+               ses = c->cn_session;
+       }
+       spin_unlock(&clp->cl_lock);
+-      if (!c)
+-              return;
+       err = setup_callback_client(clp, &conn, ses);
+       if (err) {
+-- 
+2.39.5
+
diff --git a/queue-6.12/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch b/queue-6.12/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch
new file mode 100644 (file)
index 0000000..07fff3f
--- /dev/null
@@ -0,0 +1,132 @@
+From 676c5f0c7d7d3db49e538c736b8d45dfe28dce8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2024 22:21:52 +0800
+Subject: nfsd: Revert "nfsd: release svc_expkey/svc_export with rcu_work"
+
+From: Yang Erkun <yangerkun@huawei.com>
+
+[ Upstream commit 69d803c40edeaf94089fbc8751c9b746cdc35044 ]
+
+This reverts commit f8c989a0c89a75d30f899a7cabdc14d72522bb8d.
+
+Before this commit, svc_export_put or expkey_put will call path_put with
+sync mode. After this commit, path_put will be called with async mode.
+And this can lead the unexpected results show as follow.
+
+mkfs.xfs -f /dev/sda
+echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
+echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
+exportfs -ra
+service nfs-server start
+mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
+mount /dev/sda /mnt/sda
+touch /mnt1/sda/file
+exportfs -r
+umount /mnt/sda # failed unexcepted
+
+The touch will finally call nfsd_cross_mnt, add refcount to mount, and
+then add cache_head. Before this commit, exportfs -r will call
+cache_flush to cleanup all cache_head, and path_put in
+svc_export_put/expkey_put will be finished with sync mode. So, the
+latter umount will always success. However, after this commit, path_put
+will be called with async mode, the latter umount may failed, and if
+we add some delay, umount will success too. Personally I think this bug
+and should be fixed. We first revert before bugfix patch, and then fix
+the original bug with a different way.
+
+Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
+Signed-off-by: Yang Erkun <yangerkun@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/export.c | 31 ++++++-------------------------
+ fs/nfsd/export.h |  4 ++--
+ 2 files changed, 8 insertions(+), 27 deletions(-)
+
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index 6d0455973d64..49aede376d86 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -40,24 +40,15 @@
+ #define       EXPKEY_HASHMAX          (1 << EXPKEY_HASHBITS)
+ #define       EXPKEY_HASHMASK         (EXPKEY_HASHMAX -1)
+-static void expkey_put_work(struct work_struct *work)
++static void expkey_put(struct kref *ref)
+ {
+-      struct svc_expkey *key =
+-              container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
++      struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+       if (test_bit(CACHE_VALID, &key->h.flags) &&
+           !test_bit(CACHE_NEGATIVE, &key->h.flags))
+               path_put(&key->ek_path);
+       auth_domain_put(key->ek_client);
+-      kfree(key);
+-}
+-
+-static void expkey_put(struct kref *ref)
+-{
+-      struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+-
+-      INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
+-      queue_rcu_work(system_wq, &key->ek_rcu_work);
++      kfree_rcu(key, ek_rcu);
+ }
+ static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+@@ -364,26 +355,16 @@ static void export_stats_destroy(struct export_stats *stats)
+                                           EXP_STATS_COUNTERS_NUM);
+ }
+-static void svc_export_put_work(struct work_struct *work)
++static void svc_export_put(struct kref *ref)
+ {
+-      struct svc_export *exp =
+-              container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
+-
++      struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+       path_put(&exp->ex_path);
+       auth_domain_put(exp->ex_client);
+       nfsd4_fslocs_free(&exp->ex_fslocs);
+       export_stats_destroy(exp->ex_stats);
+       kfree(exp->ex_stats);
+       kfree(exp->ex_uuid);
+-      kfree(exp);
+-}
+-
+-static void svc_export_put(struct kref *ref)
+-{
+-      struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+-
+-      INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
+-      queue_rcu_work(system_wq, &exp->ex_rcu_work);
++      kfree_rcu(exp, ex_rcu);
+ }
+ static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
+index 081afb68681e..3794ae253a70 100644
+--- a/fs/nfsd/export.h
++++ b/fs/nfsd/export.h
+@@ -75,7 +75,7 @@ struct svc_export {
+       u32                     ex_layout_types;
+       struct nfsd4_deviceid_map *ex_devid_map;
+       struct cache_detail     *cd;
+-      struct rcu_work         ex_rcu_work;
++      struct rcu_head         ex_rcu;
+       unsigned long           ex_xprtsec_modes;
+       struct export_stats     *ex_stats;
+ };
+@@ -92,7 +92,7 @@ struct svc_expkey {
+       u32                     ek_fsid[6];
+       struct path             ek_path;
+-      struct rcu_work         ek_rcu_work;
++      struct rcu_head         ek_rcu;
+ };
+ #define EX_ISSYNC(exp)                (!((exp)->ex_flags & NFSEXP_ASYNC))
+-- 
+2.39.5
+
diff --git a/queue-6.12/selftests-bpf-fix-compilation-error-in-get_uprobe_of.patch b/queue-6.12/selftests-bpf-fix-compilation-error-in-get_uprobe_of.patch
new file mode 100644 (file)
index 0000000..93fe820
--- /dev/null
@@ -0,0 +1,42 @@
+From ba31f0608167db6bbba738bcd14d0f9c234be183 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 18:57:24 +0100
+Subject: selftests/bpf: Fix compilation error in get_uprobe_offset()
+
+From: Jerome Marchand <jmarchan@redhat.com>
+
+[ Upstream commit 716f2bca1ce93bb95364f1fc0555c1650507b588 ]
+
+In get_uprobe_offset(), the call to procmap_query() use the constant
+PROCMAP_QUERY_VMA_EXECUTABLE, even if PROCMAP_QUERY is not defined.
+
+Define PROCMAP_QUERY_VMA_EXECUTABLE when PROCMAP_QUERY isn't.
+
+Fixes: 4e9e07603ecd ("selftests/bpf: make use of PROCMAP_QUERY ioctl if available")
+Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Yonghong Song <yonghong.song@linux.dev>
+Link: https://lore.kernel.org/bpf/20241218175724.578884-1-jmarchan@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/bpf/trace_helpers.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
+index 2d742fdac6b9..81943c6254e6 100644
+--- a/tools/testing/selftests/bpf/trace_helpers.c
++++ b/tools/testing/selftests/bpf/trace_helpers.c
+@@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st
+       return 0;
+ }
+ #else
++# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
++#  define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
++# endif
++
+ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
+ {
+       return -EOPNOTSUPP;
+-- 
+2.39.5
+
diff --git a/queue-6.12/series b/queue-6.12/series
new file mode 100644 (file)
index 0000000..69a4479
--- /dev/null
@@ -0,0 +1,14 @@
+media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch
+ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch
+arm64-dts-broadcom-fix-l2-linesize-for-raspberry-pi-.patch
+bpf-fix-bpf_get_smp_processor_id-on-config_smp.patch
+fork-avoid-inappropriate-uprobe-access-to-invalid-mm.patch
+mm-vmstat-fix-a-w-1-clang-compiler-warning.patch
+selftests-bpf-fix-compilation-error-in-get_uprobe_of.patch
+smb-client-deduplicate-select-netfs_support-in-kconf.patch
+smb-fix-bytes-written-value-in-proc-fs-cifs-stats.patch
+tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch
+tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch
+bpf-check-negative-offsets-in-__bpf_skb_min_len.patch
+nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch
+nfsd-restore-callback-functionality-for-nfsv4.0.patch
diff --git a/queue-6.12/smb-client-deduplicate-select-netfs_support-in-kconf.patch b/queue-6.12/smb-client-deduplicate-select-netfs_support-in-kconf.patch
new file mode 100644 (file)
index 0000000..7d76d07
--- /dev/null
@@ -0,0 +1,35 @@
+From c734c770a01a4016136750cce4fa34c01c8fe455 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Dec 2024 10:25:10 +0100
+Subject: smb: client: Deduplicate "select NETFS_SUPPORT" in Kconfig
+
+From: Dragan Simic <dsimic@manjaro.org>
+
+[ Upstream commit ee1c8e6b2931811a906b8c78006bfe0a3386fa60 ]
+
+Repeating automatically selected options in Kconfig files is redundant, so
+let's delete repeated "select NETFS_SUPPORT" that was added accidentally.
+
+Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks")
+Signed-off-by: Dragan Simic <dsimic@manjaro.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/Kconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
+index 2aff6d1395ce..9f05f94e265a 100644
+--- a/fs/smb/client/Kconfig
++++ b/fs/smb/client/Kconfig
+@@ -2,7 +2,6 @@
+ config CIFS
+       tristate "SMB3 and CIFS support (advanced network filesystem)"
+       depends on INET
+-      select NETFS_SUPPORT
+       select NLS
+       select NLS_UCS2_UTILS
+       select CRYPTO
+-- 
+2.39.5
+
diff --git a/queue-6.12/smb-fix-bytes-written-value-in-proc-fs-cifs-stats.patch b/queue-6.12/smb-fix-bytes-written-value-in-proc-fs-cifs-stats.patch
new file mode 100644 (file)
index 0000000..3a76dc7
--- /dev/null
@@ -0,0 +1,45 @@
+From d2bab391b5c0253b29e44d30e0b9932787e7c761 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Dec 2024 23:28:50 +0530
+Subject: smb: fix bytes written value in /proc/fs/cifs/Stats
+
+From: Bharath SM <bharathsm.hsk@gmail.com>
+
+[ Upstream commit 92941c7f2c9529fac1b2670482d0ced3b46eac70 ]
+
+With recent netfs apis changes, the bytes written
+value was not getting updated in /proc/fs/cifs/Stats.
+Fix this by updating tcon->bytes in write operations.
+
+Fixes: 3ee1a1fc3981 ("cifs: Cut over to using netfslib")
+Signed-off-by: Bharath SM <bharathsm@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/smb2pdu.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
+index d1bd69cbfe09..4750505465ae 100644
+--- a/fs/smb/client/smb2pdu.c
++++ b/fs/smb/client/smb2pdu.c
+@@ -4855,6 +4855,8 @@ smb2_writev_callback(struct mid_q_entry *mid)
+               if (written > wdata->subreq.len)
+                       written &= 0xFFFF;
++              cifs_stats_bytes_written(tcon, written);
++
+               if (written < wdata->subreq.len)
+                       wdata->result = -ENOSPC;
+               else
+@@ -5171,6 +5173,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
+               cifs_dbg(VFS, "Send error in write = %d\n", rc);
+       } else {
+               *nbytes = le32_to_cpu(rsp->DataLength);
++              cifs_stats_bytes_written(io_parms->tcon, *nbytes);
+               trace_smb3_write_done(0, 0, xid,
+                                     req->PersistentFileId,
+                                     io_parms->tcon->tid,
+-- 
+2.39.5
+
diff --git a/queue-6.12/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch b/queue-6.12/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch
new file mode 100644 (file)
index 0000000..a5508a9
--- /dev/null
@@ -0,0 +1,116 @@
+From 5b96a3b7d9b83d50769cc9388c8aa4266b74d26e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 01:20:39 +0000
+Subject: tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress
+ redirection
+
+From: Zijian Zhang <zijianzhang@bytedance.com>
+
+[ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ]
+
+When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
+be created out of the skb, and the rmem accounting of the sk_msg will be
+handled by the skb.
+
+For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
+to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
+the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
+except for the global memory limit, the rmem of sk_redir is nearly
+unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.
+
+Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
+used in these two paths. We use "msg->skb" to test whether the sk_msg is
+skb backed up. If it's not, we shall do the memory accounting explicitly.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skmsg.h | 11 ++++++++---
+ net/core/skmsg.c      |  6 +++++-
+ net/ipv4/tcp_bpf.c    |  4 +++-
+ 3 files changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index d9b03e0746e7..2cbe0c22a32f 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -317,17 +317,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+       kfree_skb(skb);
+ }
+-static inline void sk_psock_queue_msg(struct sk_psock *psock,
++static inline bool sk_psock_queue_msg(struct sk_psock *psock,
+                                     struct sk_msg *msg)
+ {
++      bool ret;
++
+       spin_lock_bh(&psock->ingress_lock);
+-      if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
++      if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+               list_add_tail(&msg->list, &psock->ingress_msg);
+-      else {
++              ret = true;
++      } else {
+               sk_msg_free(psock->sk, msg);
+               kfree(msg);
++              ret = false;
+       }
+       spin_unlock_bh(&psock->ingress_lock);
++      return ret;
+ }
+ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index e90fbab703b2..8ad7e6755fd6 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+                       if (likely(!peek)) {
+                               sge->offset += copy;
+                               sge->length -= copy;
+-                              if (!msg_rx->skb)
++                              if (!msg_rx->skb) {
+                                       sk_mem_uncharge(sk, copy);
++                                      atomic_sub(copy, &sk->sk_rmem_alloc);
++                              }
+                               msg_rx->sg.size -= copy;
+                               if (!sge->length) {
+@@ -772,6 +774,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+       list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
+               list_del(&msg->list);
++              if (!msg->skb)
++                      atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
+               sk_msg_free(psock->sk, msg);
+               kfree(msg);
+       }
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index b21ea634909c..392678ae80f4 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+               }
+               sk_mem_charge(sk, size);
++              atomic_add(size, &sk->sk_rmem_alloc);
+               sk_msg_xfer(tmp, msg, i, size);
+               copied += size;
+               if (sge->length)
+@@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+       if (!ret) {
+               msg->sg.start = i;
+-              sk_psock_queue_msg(psock, tmp);
++              if (!sk_psock_queue_msg(psock, tmp))
++                      atomic_sub(copied, &sk->sk_rmem_alloc);
+               sk_psock_data_ready(sk, psock);
+       } else {
+               sk_msg_free(sk, tmp);
+-- 
+2.39.5
+
diff --git a/queue-6.12/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch b/queue-6.12/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch
new file mode 100644 (file)
index 0000000..b5fbc0f
--- /dev/null
@@ -0,0 +1,71 @@
+From 7e5fed317caf8bf3b964e00d99eb8436e54ef6fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 01:20:38 +0000
+Subject: tcp_bpf: Charge receive socket buffer in bpf_tcp_ingress()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 54f89b3178d5448dd4457afbb98fc1ab99090a65 ]
+
+When bpf_tcp_ingress() is called, the skmsg is being redirected to the
+ingress of the destination socket. Therefore, we should charge its
+receive socket buffer, instead of sending socket buffer.
+
+Because sk_rmem_schedule() tests pfmemalloc of skb, we need to
+introduce a wrapper and call it for skmsg.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241210012039.1669389-2-zijianzhang@bytedance.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h | 10 ++++++++--
+ net/ipv4/tcp_bpf.c |  2 +-
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index f29c14448938..fa055cf1785e 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1521,7 +1521,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
+ }
+ static inline bool
+-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
++__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
+ {
+       int delta;
+@@ -1529,7 +1529,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+               return true;
+       delta = size - sk->sk_forward_alloc;
+       return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
+-              skb_pfmemalloc(skb);
++             pfmemalloc;
++}
++
++static inline bool
++sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
++{
++      return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
+ }
+ static inline int sk_unused_reserved_mem(const struct sock *sk)
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 99cef92e6290..b21ea634909c 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -49,7 +49,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+               sge = sk_msg_elem(msg, i);
+               size = (apply && apply_bytes < sge->length) ?
+                       apply_bytes : sge->length;
+-              if (!sk_wmem_schedule(sk, size)) {
++              if (!__sk_rmem_schedule(sk, size, false)) {
+                       if (!copied)
+                               ret = -ENOMEM;
+                       break;
+-- 
+2.39.5
+