]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Fri, 27 Dec 2024 15:06:14 +0000 (10:06 -0500)
committerSasha Levin <sashal@kernel.org>
Fri, 27 Dec 2024 15:06:14 +0000 (10:06 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch [new file with mode: 0644]
queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch [new file with mode: 0644]
queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch [new file with mode: 0644]
queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch [new file with mode: 0644]
queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch [new file with mode: 0644]
queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch [new file with mode: 0644]
queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch [new file with mode: 0644]
queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch [new file with mode: 0644]
queue-6.6/series [new file with mode: 0644]
queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch [new file with mode: 0644]
queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch [new file with mode: 0644]

diff --git a/queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch b/queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch
new file mode 100644 (file)
index 0000000..fbf8773
--- /dev/null
@@ -0,0 +1,66 @@
+From 2cb350627e9db72e9e34bb46fd00d78900860d21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 19:40:54 -0800
+Subject: bpf: Check negative offsets in __bpf_skb_min_len()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 9ecc4d858b92c1bb0673ad9c327298e600c55659 ]
+
+skb_network_offset() and skb_transport_offset() can be negative when
+they are called after we pull the transport header, for example, when
+we use eBPF sockmap at the point of ->sk_data_ready().
+
+__bpf_skb_min_len() uses an unsigned int to get these offsets, this
+leads to a very large number which then causes bpf_skb_change_tail()
+failed unexpectedly.
+
+Fix this by using a signed int to get these offsets and ensure the
+minimum is at least zero.
+
+Fixes: 5293efe62df8 ("bpf: add bpf_skb_change_tail helper")
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241213034057.246437-2-xiyou.wangcong@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index bc52ab3374f3..34320ce70096 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3731,13 +3731,22 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
+ static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+ {
+-      u32 min_len = skb_network_offset(skb);
++      int offset = skb_network_offset(skb);
++      u32 min_len = 0;
+-      if (skb_transport_header_was_set(skb))
+-              min_len = skb_transport_offset(skb);
+-      if (skb->ip_summed == CHECKSUM_PARTIAL)
+-              min_len = skb_checksum_start_offset(skb) +
+-                        skb->csum_offset + sizeof(__sum16);
++      if (offset > 0)
++              min_len = offset;
++      if (skb_transport_header_was_set(skb)) {
++              offset = skb_transport_offset(skb);
++              if (offset > 0)
++                      min_len = offset;
++      }
++      if (skb->ip_summed == CHECKSUM_PARTIAL) {
++              offset = skb_checksum_start_offset(skb) +
++                       skb->csum_offset + sizeof(__sum16);
++              if (offset > 0)
++                      min_len = offset;
++      }
+       return min_len;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch b/queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch
new file mode 100644 (file)
index 0000000..22e7f3f
--- /dev/null
@@ -0,0 +1,50 @@
+From 1050cd9e0981afed8a01154a1d039e72aa1fac4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Dec 2024 17:33:25 +0100
+Subject: ceph: allocate sparse_ext map only for sparse reads
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+[ Upstream commit 18d44c5d062b97b97bb0162d9742440518958dc1 ]
+
+If mounted with sparseread option, ceph_direct_read_write() ends up
+making an unnecessarily allocation for O_DIRECT writes.
+
+Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Alex Markuze <amarkuze@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/file.c        | 2 +-
+ net/ceph/osd_client.c | 2 ++
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index 5233bbab8a76..a03b11cf7887 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1455,7 +1455,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+               }
+               op = &req->r_ops[0];
+-              if (sparse) {
++              if (!write && sparse) {
+                       extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+                       ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+                       if (ret) {
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 3babcd5e65e1..0b6a8bb0642f 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1173,6 +1173,8 @@ EXPORT_SYMBOL(ceph_osdc_new_request);
+ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+ {
++      WARN_ON(op->op != CEPH_OSD_OP_SPARSE_READ);
++
+       op->extent.sparse_ext_cnt = cnt;
+       op->extent.sparse_ext = kmalloc_array(cnt,
+                                             sizeof(*op->extent.sparse_ext),
+-- 
+2.39.5
+
diff --git a/queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch b/queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch
new file mode 100644 (file)
index 0000000..e9f9a2d
--- /dev/null
@@ -0,0 +1,113 @@
+From 27ffdd82a4e38edbd0e394ecb9c24ff23ca3e50e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2024 17:32:59 +0100
+Subject: ceph: fix memory leak in ceph_direct_read_write()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+[ Upstream commit 66e0c4f91461d17d48071695271c824620bed4ef ]
+
+The bvecs array which is allocated in iter_get_bvecs_alloc() is leaked
+and pages remain pinned if ceph_alloc_sparse_ext_map() fails.
+
+There is no need to delay the allocation of sparse_ext map until after
+the bvecs array is set up, so fix this by moving sparse_ext allocation
+a bit earlier.  Also, make a similar adjustment in __ceph_sync_read()
+for consistency (a leak of the same kind in __ceph_sync_read() has been
+addressed differently).
+
+Cc: stable@vger.kernel.org
+Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Alex Markuze <amarkuze@redhat.com>
+Stable-dep-of: 18d44c5d062b ("ceph: allocate sparse_ext map only for sparse reads")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/file.c | 43 ++++++++++++++++++++++---------------------
+ 1 file changed, 22 insertions(+), 21 deletions(-)
+
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index 23dcfb916298..5233bbab8a76 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1026,6 +1026,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                       len = read_off + read_len - off;
+               more = len < iov_iter_count(to);
++              op = &req->r_ops[0];
++              if (sparse) {
++                      extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
++                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
++                      if (ret) {
++                              ceph_osdc_put_request(req);
++                              break;
++                      }
++              }
++
+               num_pages = calc_pages_for(read_off, read_len);
+               page_off = offset_in_page(off);
+               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+@@ -1039,16 +1049,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                                                offset_in_page(read_off),
+                                                false, true);
+-              op = &req->r_ops[0];
+-              if (sparse) {
+-                      extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+-                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+-                      if (ret) {
+-                              ceph_osdc_put_request(req);
+-                              break;
+-                      }
+-              }
+-
+               ceph_osdc_start_request(osdc, req);
+               ret = ceph_osdc_wait_request(osdc, req);
+@@ -1454,6 +1454,16 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+                       break;
+               }
++              op = &req->r_ops[0];
++              if (sparse) {
++                      extent_cnt = __ceph_sparse_read_ext_count(inode, size);
++                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
++                      if (ret) {
++                              ceph_osdc_put_request(req);
++                              break;
++                      }
++              }
++
+               len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+               if (len < 0) {
+                       ceph_osdc_put_request(req);
+@@ -1463,6 +1473,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+               if (len != size)
+                       osd_req_op_extent_update(req, 0, len);
++              osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
++
+               /*
+                * To simplify error handling, allow AIO when IO within i_size
+                * or IO can be satisfied by single OSD request.
+@@ -1494,17 +1506,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+                       req->r_mtime = mtime;
+               }
+-              osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+-              op = &req->r_ops[0];
+-              if (sparse) {
+-                      extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+-                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+-                      if (ret) {
+-                              ceph_osdc_put_request(req);
+-                              break;
+-                      }
+-              }
+-
+               if (aio_req) {
+                       aio_req->total_len += len;
+                       aio_req->num_reqs++;
+-- 
+2.39.5
+
diff --git a/queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch b/queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch
new file mode 100644 (file)
index 0000000..69939b8
--- /dev/null
@@ -0,0 +1,144 @@
+From 4f467a6e3ff5a255e73e9ca9f09818efccef155b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Nov 2023 10:44:41 +0800
+Subject: ceph: try to allocate a smaller extent map for sparse read
+
+From: Xiubo Li <xiubli@redhat.com>
+
+[ Upstream commit aaefabc4a5f7ae48682c4d2d5d10faaf95c08eb9 ]
+
+In fscrypt case and for a smaller read length we can predict the
+max count of the extent map. And for small read length use cases
+this could save some memories.
+
+[ idryomov: squash into a single patch to avoid build break, drop
+  redundant variable in ceph_alloc_sparse_ext_map() ]
+
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Stable-dep-of: 18d44c5d062b ("ceph: allocate sparse_ext map only for sparse reads")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/addr.c                  |  4 +++-
+ fs/ceph/file.c                  |  8 ++++++--
+ fs/ceph/super.h                 | 14 ++++++++++++++
+ include/linux/ceph/osd_client.h |  7 +++++--
+ 4 files changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
+index 1a2776025e98..2c92de964c5a 100644
+--- a/fs/ceph/addr.c
++++ b/fs/ceph/addr.c
+@@ -355,6 +355,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
+       u64 len = subreq->len;
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 off = subreq->start;
++      int extent_cnt;
+       if (ceph_inode_is_shutdown(inode)) {
+               err = -EIO;
+@@ -377,7 +378,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
+       }
+       if (sparse) {
+-              err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
++              extent_cnt = __ceph_sparse_read_ext_count(inode, len);
++              err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt);
+               if (err)
+                       goto out;
+       }
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index 813974244a9d..23dcfb916298 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1001,6 +1001,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+               struct ceph_osd_req_op *op;
+               u64 read_off = off;
+               u64 read_len = len;
++              int extent_cnt;
+               /* determine new offset/length if encrypted */
+               ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+@@ -1040,7 +1041,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+               op = &req->r_ops[0];
+               if (sparse) {
+-                      ret = ceph_alloc_sparse_ext_map(op);
++                      extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
++                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+@@ -1431,6 +1433,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+               ssize_t len;
+               struct ceph_osd_req_op *op;
+               int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
++              int extent_cnt;
+               if (write)
+                       size = min_t(u64, size, fsc->mount_options->wsize);
+@@ -1494,7 +1497,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+               osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+               op = &req->r_ops[0];
+               if (sparse) {
+-                      ret = ceph_alloc_sparse_ext_map(op);
++                      extent_cnt = __ceph_sparse_read_ext_count(inode, size);
++                      ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 8efd4ba60774..5903e3fb6d75 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -3,6 +3,7 @@
+ #define _FS_CEPH_SUPER_H
+ #include <linux/ceph/ceph_debug.h>
++#include <linux/ceph/osd_client.h>
+ #include <asm/unaligned.h>
+ #include <linux/backing-dev.h>
+@@ -1401,6 +1402,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci,
+               ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
+ }
++static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len)
++{
++      int cnt = 0;
++
++      if (IS_ENCRYPTED(inode)) {
++              cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT;
++              if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL)
++                      cnt = 0;
++      }
++
++      return cnt;
++}
++
+ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
+                             struct ceph_mds_session *session,
+                             struct ceph_msg *msg);
+diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
+index f703fb8030de..50e409e84466 100644
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -573,9 +573,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+  */
+ #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
+-static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
++static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+ {
+-      return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
++      if (!cnt)
++              cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL;
++
++      return __ceph_alloc_sparse_ext_map(op, cnt);
+ }
+ extern void ceph_osdc_get_request(struct ceph_osd_request *req);
+-- 
+2.39.5
+
diff --git a/queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch b/queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch
new file mode 100644 (file)
index 0000000..ce67de9
--- /dev/null
@@ -0,0 +1,62 @@
+From bf30e6deaa923acc334dedf8b8417d8e76b7ab19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 08:58:00 -0700
+Subject: media: dvb-frontends: dib3000mb: fix uninit-value in
+ dib3000_write_reg
+
+From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
+
+[ Upstream commit 2dd59fe0e19e1ab955259978082b62e5751924c7 ]
+
+Syzbot reports [1] an uninitialized value issue found by KMSAN in
+dib3000_read_reg().
+
+Local u8 rb[2] is used in i2c_transfer() as a read buffer; in case
+that call fails, the buffer may end up with some undefined values.
+
+Since no elaborate error handling is expected in dib3000_write_reg(),
+simply zero out rb buffer to mitigate the problem.
+
+[1] Syzkaller report
+dvb-usb: bulk message failed: -22 (6/0)
+=====================================================
+BUG: KMSAN: uninit-value in dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+ dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+ dibusb_dib3000mb_frontend_attach+0x155/0x2f0 drivers/media/usb/dvb-usb/dibusb-mb.c:31
+ dvb_usb_adapter_frontend_init+0xed/0x9a0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290
+ dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:90 [inline]
+ dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:186 [inline]
+ dvb_usb_device_init+0x25a8/0x3760 drivers/media/usb/dvb-usb/dvb-usb-init.c:310
+ dibusb_probe+0x46/0x250 drivers/media/usb/dvb-usb/dibusb-mb.c:110
+...
+Local variable rb created at:
+ dib3000_read_reg+0x86/0x4e0 drivers/media/dvb-frontends/dib3000mb.c:54
+ dib3000mb_attach+0x123/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758
+...
+
+Fixes: 74340b0a8bc6 ("V4L/DVB (4457): Remove dib3000-common-module")
+Reported-by: syzbot+c88fc0ebe0d5935c70da@syzkaller.appspotmail.com
+Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
+Link: https://lore.kernel.org/r/20240517155800.9881-1-n.zhandarovich@fintech.ru
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/media/dvb-frontends/dib3000mb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
+index c598b2a63325..7c452ddd9e40 100644
+--- a/drivers/media/dvb-frontends/dib3000mb.c
++++ b/drivers/media/dvb-frontends/dib3000mb.c
+@@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a
+ static int dib3000_read_reg(struct dib3000_state *state, u16 reg)
+ {
+       u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff };
+-      u8 rb[2];
++      u8 rb[2] = {};
+       struct i2c_msg msg[] = {
+               { .addr = state->config.demod_address, .flags = 0,        .buf = wb, .len = 2 },
+               { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 },
+-- 
+2.39.5
+
diff --git a/queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch b/queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch
new file mode 100644 (file)
index 0000000..65112a2
--- /dev/null
@@ -0,0 +1,43 @@
+From 61e7a5c6fc88bf17949890241db1baf41001c14b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 13:31:26 -0800
+Subject: mm/vmstat: fix a W=1 clang compiler warning
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit 30c2de0a267c04046d89e678cc0067a9cfb455df ]
+
+Fix the following clang compiler warning that is reported if the kernel is
+built with W=1:
+
+./include/linux/vmstat.h:518:36: error: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Werror,-Wenum-enum-conversion]
+  518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
+      |                               ~~~~~~~~~~~ ^ ~~~
+
+Link: https://lkml.kernel.org/r/20241212213126.1269116-1-bvanassche@acm.org
+Fixes: 9d7ea9a297e6 ("mm/vmstat: add helpers to get vmstat item names for each enum type")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/vmstat.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
+index fed855bae6d8..3219b368db79 100644
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -519,7 +519,7 @@ static inline const char *node_stat_name(enum node_stat_item item)
+ static inline const char *lru_list_name(enum lru_list lru)
+ {
+-      return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
++      return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_"
+ }
+ static inline const char *writeback_stat_name(enum writeback_stat_item item)
+-- 
+2.39.5
+
diff --git a/queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch b/queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch
new file mode 100644 (file)
index 0000000..9dc8c3f
--- /dev/null
@@ -0,0 +1,51 @@
+From a00416c3c42c674bf5acca21b344b02a2bd42d53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Dec 2024 15:28:18 +1100
+Subject: nfsd: restore callback functionality for NFSv4.0
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 7917f01a286ce01e9c085e24468421f596ee1a0c ]
+
+A recent patch inadvertently broke callbacks for NFSv4.0.
+
+In the 4.0 case we do not expect a session to be found but still need to
+call setup_callback_client() which will not try to dereference it.
+
+This patch moves the check for failure to find a session into the 4.1+
+branch of setup_callback_client()
+
+Fixes: 1e02c641c3a4 ("NFSD: Prevent NULL dereference in nfsd4_process_cb_update()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4callback.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 49a49529c6b8..54ffadf02e03 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -986,7 +986,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
+               args.authflavor = clp->cl_cred.cr_flavor;
+               clp->cl_cb_ident = conn->cb_ident;
+       } else {
+-              if (!conn->cb_xprt)
++              if (!conn->cb_xprt || !ses)
+                       return -EINVAL;
+               clp->cl_cb_session = ses;
+               args.bc_xprt = conn->cb_xprt;
+@@ -1379,8 +1379,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
+               ses = c->cn_session;
+       }
+       spin_unlock(&clp->cl_lock);
+-      if (!c)
+-              return;
+       err = setup_callback_client(clp, &conn, ses);
+       if (err) {
+-- 
+2.39.5
+
diff --git a/queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch b/queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch
new file mode 100644 (file)
index 0000000..0596db0
--- /dev/null
@@ -0,0 +1,132 @@
+From f27ad9dce8f36f887ef33f140ddd917f58edbe3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2024 22:21:52 +0800
+Subject: nfsd: Revert "nfsd: release svc_expkey/svc_export with rcu_work"
+
+From: Yang Erkun <yangerkun@huawei.com>
+
+[ Upstream commit 69d803c40edeaf94089fbc8751c9b746cdc35044 ]
+
+This reverts commit f8c989a0c89a75d30f899a7cabdc14d72522bb8d.
+
+Before this commit, svc_export_put or expkey_put will call path_put with
+sync mode. After this commit, path_put will be called with async mode.
+And this can lead the unexpected results show as follow.
+
+mkfs.xfs -f /dev/sda
+echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports
+echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports
+exportfs -ra
+service nfs-server start
+mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1
+mount /dev/sda /mnt/sda
+touch /mnt1/sda/file
+exportfs -r
+umount /mnt/sda # failed unexcepted
+
+The touch will finally call nfsd_cross_mnt, add refcount to mount, and
+then add cache_head. Before this commit, exportfs -r will call
+cache_flush to cleanup all cache_head, and path_put in
+svc_export_put/expkey_put will be finished with sync mode. So, the
+latter umount will always success. However, after this commit, path_put
+will be called with async mode, the latter umount may failed, and if
+we add some delay, umount will success too. Personally I think this bug
+and should be fixed. We first revert before bugfix patch, and then fix
+the original bug with a different way.
+
+Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work")
+Signed-off-by: Yang Erkun <yangerkun@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/export.c | 31 ++++++-------------------------
+ fs/nfsd/export.h |  4 ++--
+ 2 files changed, 8 insertions(+), 27 deletions(-)
+
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index d4d3ec58047e..4b5d998cbc2f 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -40,24 +40,15 @@
+ #define       EXPKEY_HASHMAX          (1 << EXPKEY_HASHBITS)
+ #define       EXPKEY_HASHMASK         (EXPKEY_HASHMAX -1)
+-static void expkey_put_work(struct work_struct *work)
++static void expkey_put(struct kref *ref)
+ {
+-      struct svc_expkey *key =
+-              container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
++      struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+       if (test_bit(CACHE_VALID, &key->h.flags) &&
+           !test_bit(CACHE_NEGATIVE, &key->h.flags))
+               path_put(&key->ek_path);
+       auth_domain_put(key->ek_client);
+-      kfree(key);
+-}
+-
+-static void expkey_put(struct kref *ref)
+-{
+-      struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+-
+-      INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
+-      queue_rcu_work(system_wq, &key->ek_rcu_work);
++      kfree_rcu(key, ek_rcu);
+ }
+ static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+@@ -360,26 +351,16 @@ static void export_stats_destroy(struct export_stats *stats)
+                                            EXP_STATS_COUNTERS_NUM);
+ }
+-static void svc_export_put_work(struct work_struct *work)
++static void svc_export_put(struct kref *ref)
+ {
+-      struct svc_export *exp =
+-              container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
+-
++      struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+       path_put(&exp->ex_path);
+       auth_domain_put(exp->ex_client);
+       nfsd4_fslocs_free(&exp->ex_fslocs);
+       export_stats_destroy(exp->ex_stats);
+       kfree(exp->ex_stats);
+       kfree(exp->ex_uuid);
+-      kfree(exp);
+-}
+-
+-static void svc_export_put(struct kref *ref)
+-{
+-      struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+-
+-      INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
+-      queue_rcu_work(system_wq, &exp->ex_rcu_work);
++      kfree_rcu(exp, ex_rcu);
+ }
+ static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
+index 9d895570ceba..ca9dc230ae3d 100644
+--- a/fs/nfsd/export.h
++++ b/fs/nfsd/export.h
+@@ -75,7 +75,7 @@ struct svc_export {
+       u32                     ex_layout_types;
+       struct nfsd4_deviceid_map *ex_devid_map;
+       struct cache_detail     *cd;
+-      struct rcu_work         ex_rcu_work;
++      struct rcu_head         ex_rcu;
+       unsigned long           ex_xprtsec_modes;
+       struct export_stats     *ex_stats;
+ };
+@@ -92,7 +92,7 @@ struct svc_expkey {
+       u32                     ek_fsid[6];
+       struct path             ek_path;
+-      struct rcu_work         ek_rcu_work;
++      struct rcu_head         ek_rcu;
+ };
+ #define EX_ISSYNC(exp)                (!((exp)->ex_flags & NFSEXP_ASYNC))
+-- 
+2.39.5
+
diff --git a/queue-6.6/series b/queue-6.6/series
new file mode 100644 (file)
index 0000000..4e13e3a
--- /dev/null
@@ -0,0 +1,10 @@
+media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch
+ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch
+ceph-fix-memory-leak-in-ceph_direct_read_write.patch
+ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch
+mm-vmstat-fix-a-w-1-clang-compiler-warning.patch
+tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch
+tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch
+bpf-check-negative-offsets-in-__bpf_skb_min_len.patch
+nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch
+nfsd-restore-callback-functionality-for-nfsv4.0.patch
diff --git a/queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch b/queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch
new file mode 100644 (file)
index 0000000..30fa1f0
--- /dev/null
@@ -0,0 +1,116 @@
+From 3d96fc6e00f9a888555b9d95d0e9b2d01b0d57ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 01:20:39 +0000
+Subject: tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress
+ redirection
+
+From: Zijian Zhang <zijianzhang@bytedance.com>
+
+[ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ]
+
+When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
+be created out of the skb, and the rmem accounting of the sk_msg will be
+handled by the skb.
+
+For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
+to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
+the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
+except for the global memory limit, the rmem of sk_redir is nearly
+unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.
+
+Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
+used in these two paths. We use "msg->skb" to test whether the sk_msg is
+skb backed up. If it's not, we shall do the memory accounting explicitly.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skmsg.h | 11 ++++++++---
+ net/core/skmsg.c      |  6 +++++-
+ net/ipv4/tcp_bpf.c    |  4 +++-
+ 3 files changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index 062fe440f5d0..6ccfd9236387 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -308,17 +308,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+       kfree_skb(skb);
+ }
+-static inline void sk_psock_queue_msg(struct sk_psock *psock,
++static inline bool sk_psock_queue_msg(struct sk_psock *psock,
+                                     struct sk_msg *msg)
+ {
++      bool ret;
++
+       spin_lock_bh(&psock->ingress_lock);
+-      if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
++      if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+               list_add_tail(&msg->list, &psock->ingress_msg);
+-      else {
++              ret = true;
++      } else {
+               sk_msg_free(psock->sk, msg);
+               kfree(msg);
++              ret = false;
+       }
+       spin_unlock_bh(&psock->ingress_lock);
++      return ret;
+ }
+ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 846fd672f0e5..902098e221b3 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+                       if (likely(!peek)) {
+                               sge->offset += copy;
+                               sge->length -= copy;
+-                              if (!msg_rx->skb)
++                              if (!msg_rx->skb) {
+                                       sk_mem_uncharge(sk, copy);
++                                      atomic_sub(copy, &sk->sk_rmem_alloc);
++                              }
+                               msg_rx->sg.size -= copy;
+                               if (!sge->length) {
+@@ -772,6 +774,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+       list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
+               list_del(&msg->list);
++              if (!msg->skb)
++                      atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
+               sk_msg_free(psock->sk, msg);
+               kfree(msg);
+       }
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index a7b5b160d107..f882054fae5e 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+               }
+               sk_mem_charge(sk, size);
++              atomic_add(size, &sk->sk_rmem_alloc);
+               sk_msg_xfer(tmp, msg, i, size);
+               copied += size;
+               if (sge->length)
+@@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+       if (!ret) {
+               msg->sg.start = i;
+-              sk_psock_queue_msg(psock, tmp);
++              if (!sk_psock_queue_msg(psock, tmp))
++                      atomic_sub(copied, &sk->sk_rmem_alloc);
+               sk_psock_data_ready(sk, psock);
+       } else {
+               sk_msg_free(sk, tmp);
+-- 
+2.39.5
+
diff --git a/queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch b/queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch
new file mode 100644 (file)
index 0000000..476d070
--- /dev/null
@@ -0,0 +1,71 @@
+From 221ebfa00bcf79f779b41c73d3a6b58a6dd070b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 01:20:38 +0000
+Subject: tcp_bpf: Charge receive socket buffer in bpf_tcp_ingress()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 54f89b3178d5448dd4457afbb98fc1ab99090a65 ]
+
+When bpf_tcp_ingress() is called, the skmsg is being redirected to the
+ingress of the destination socket. Therefore, we should charge its
+receive socket buffer, instead of sending socket buffer.
+
+Because sk_rmem_schedule() tests pfmemalloc of skb, we need to
+introduce a wrapper and call it for skmsg.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/20241210012039.1669389-2-zijianzhang@bytedance.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h | 10 ++++++++--
+ net/ipv4/tcp_bpf.c |  2 +-
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index a6b795ec7c9c..dc625f94ee37 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1635,7 +1635,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
+ }
+ static inline bool
+-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
++__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
+ {
+       int delta;
+@@ -1643,7 +1643,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+               return true;
+       delta = size - sk->sk_forward_alloc;
+       return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
+-              skb_pfmemalloc(skb);
++             pfmemalloc;
++}
++
++static inline bool
++sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
++{
++      return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
+ }
+ static inline int sk_unused_reserved_mem(const struct sock *sk)
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 0a42d73c0850..a7b5b160d107 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -49,7 +49,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+               sge = sk_msg_elem(msg, i);
+               size = (apply && apply_bytes < sge->length) ?
+                       apply_bytes : sge->length;
+-              if (!sk_wmem_schedule(sk, size)) {
++              if (!__sk_rmem_schedule(sk, size, false)) {
+                       if (!copied)
+                               ret = -ENOMEM;
+                       break;
+-- 
+2.39.5
+