From: Sasha Levin Date: Fri, 27 Dec 2024 15:06:14 +0000 (-0500) Subject: Fixes for 6.6 X-Git-Tag: v6.1.123~60 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b6bb16e6844817218cc0b71e960a287dc4fa5ce5;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch b/queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch new file mode 100644 index 00000000000..fbf8773052c --- /dev/null +++ b/queue-6.6/bpf-check-negative-offsets-in-__bpf_skb_min_len.patch @@ -0,0 +1,66 @@ +From 2cb350627e9db72e9e34bb46fd00d78900860d21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 19:40:54 -0800 +Subject: bpf: Check negative offsets in __bpf_skb_min_len() + +From: Cong Wang + +[ Upstream commit 9ecc4d858b92c1bb0673ad9c327298e600c55659 ] + +skb_network_offset() and skb_transport_offset() can be negative when +they are called after we pull the transport header, for example, when +we use eBPF sockmap at the point of ->sk_data_ready(). + +__bpf_skb_min_len() uses an unsigned int to get these offsets, this +leads to a very large number which then causes bpf_skb_change_tail() +failed unexpectedly. + +Fix this by using a signed int to get these offsets and ensure the +minimum is at least zero. + +Fixes: 5293efe62df8 ("bpf: add bpf_skb_change_tail helper") +Signed-off-by: Cong Wang +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241213034057.246437-2-xiyou.wangcong@gmail.com +Signed-off-by: Sasha Levin +--- + net/core/filter.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/net/core/filter.c b/net/core/filter.c +index bc52ab3374f3..34320ce70096 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3731,13 +3731,22 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = { + + static u32 __bpf_skb_min_len(const struct sk_buff *skb) + { +- u32 min_len = skb_network_offset(skb); ++ int offset = skb_network_offset(skb); ++ u32 min_len = 0; + +- if (skb_transport_header_was_set(skb)) +- min_len = skb_transport_offset(skb); +- if (skb->ip_summed == CHECKSUM_PARTIAL) +- min_len = skb_checksum_start_offset(skb) + +- skb->csum_offset + sizeof(__sum16); ++ if (offset > 0) ++ min_len = offset; ++ if (skb_transport_header_was_set(skb)) { ++ offset = skb_transport_offset(skb); ++ if (offset > 0) ++ min_len = offset; ++ } ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ offset = skb_checksum_start_offset(skb) + ++ skb->csum_offset + sizeof(__sum16); ++ if (offset > 0) ++ min_len = offset; ++ } + return min_len; + } + +-- +2.39.5 + diff --git a/queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch b/queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch new file mode 100644 index 00000000000..22e7f3f2c1f --- /dev/null +++ b/queue-6.6/ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch @@ -0,0 +1,50 @@ +From 1050cd9e0981afed8a01154a1d039e72aa1fac4d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 Dec 2024 17:33:25 +0100 +Subject: ceph: allocate sparse_ext map only for sparse reads + +From: Ilya Dryomov + +[ Upstream commit 18d44c5d062b97b97bb0162d9742440518958dc1 ] + +If mounted with sparseread option, ceph_direct_read_write() ends up +making an unnecessarily allocation for O_DIRECT writes. + +Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads") +Signed-off-by: Ilya Dryomov +Reviewed-by: Alex Markuze +Signed-off-by: Sasha Levin +--- + fs/ceph/file.c | 2 +- + net/ceph/osd_client.c | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ceph/file.c b/fs/ceph/file.c +index 5233bbab8a76..a03b11cf7887 100644 +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1455,7 +1455,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + } + + op = &req->r_ops[0]; +- if (sparse) { ++ if (!write && sparse) { + extent_cnt = __ceph_sparse_read_ext_count(inode, size); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); + if (ret) { +diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c +index 3babcd5e65e1..0b6a8bb0642f 100644 +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -1173,6 +1173,8 @@ EXPORT_SYMBOL(ceph_osdc_new_request); + + int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) + { ++ WARN_ON(op->op != CEPH_OSD_OP_SPARSE_READ); ++ + op->extent.sparse_ext_cnt = cnt; + op->extent.sparse_ext = kmalloc_array(cnt, + sizeof(*op->extent.sparse_ext), +-- +2.39.5 + diff --git a/queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch b/queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch new file mode 100644 index 00000000000..e9f9a2ddad0 --- /dev/null +++ b/queue-6.6/ceph-fix-memory-leak-in-ceph_direct_read_write.patch @@ -0,0 +1,113 @@ +From 27ffdd82a4e38edbd0e394ecb9c24ff23ca3e50e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 17:32:59 +0100 +Subject: ceph: fix memory leak in ceph_direct_read_write() + +From: Ilya Dryomov + +[ Upstream commit 66e0c4f91461d17d48071695271c824620bed4ef ] + +The bvecs array which is allocated in iter_get_bvecs_alloc() is leaked +and pages remain pinned if ceph_alloc_sparse_ext_map() fails. + +There is no need to delay the allocation of sparse_ext map until after +the bvecs array is set up, so fix this by moving sparse_ext allocation +a bit earlier. Also, make a similar adjustment in __ceph_sync_read() +for consistency (a leak of the same kind in __ceph_sync_read() has been +addressed differently). + +Cc: stable@vger.kernel.org +Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads") +Signed-off-by: Ilya Dryomov +Reviewed-by: Alex Markuze +Stable-dep-of: 18d44c5d062b ("ceph: allocate sparse_ext map only for sparse reads") +Signed-off-by: Sasha Levin +--- + fs/ceph/file.c | 43 ++++++++++++++++++++++--------------------- + 1 file changed, 22 insertions(+), 21 deletions(-) + +diff --git a/fs/ceph/file.c b/fs/ceph/file.c +index 23dcfb916298..5233bbab8a76 100644 +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1026,6 +1026,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, + len = read_off + read_len - off; + more = len < iov_iter_count(to); + ++ op = &req->r_ops[0]; ++ if (sparse) { ++ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); ++ if (ret) { ++ ceph_osdc_put_request(req); ++ break; ++ } ++ } ++ + num_pages = calc_pages_for(read_off, read_len); + page_off = offset_in_page(off); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); +@@ -1039,16 +1049,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, + offset_in_page(read_off), + false, true); + +- op = &req->r_ops[0]; +- if (sparse) { +- extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); +- ret = ceph_alloc_sparse_ext_map(op, extent_cnt); +- if (ret) { +- ceph_osdc_put_request(req); +- break; +- } +- } +- + ceph_osdc_start_request(osdc, req); + ret = ceph_osdc_wait_request(osdc, req); + +@@ -1454,6 +1454,16 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + break; + } + ++ op = &req->r_ops[0]; ++ if (sparse) { ++ extent_cnt = __ceph_sparse_read_ext_count(inode, size); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); ++ if (ret) { ++ ceph_osdc_put_request(req); ++ break; ++ } ++ } ++ + len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages); + if (len < 0) { + ceph_osdc_put_request(req); +@@ -1463,6 +1473,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + if (len != size) + osd_req_op_extent_update(req, 0, len); + ++ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); ++ + /* + * To simplify error handling, allow AIO when IO within i_size + * or IO can be satisfied by single OSD request. +@@ -1494,17 +1506,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + req->r_mtime = mtime; + } + +- osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); +- op = &req->r_ops[0]; +- if (sparse) { +- extent_cnt = __ceph_sparse_read_ext_count(inode, size); +- ret = ceph_alloc_sparse_ext_map(op, extent_cnt); +- if (ret) { +- ceph_osdc_put_request(req); +- break; +- } +- } +- + if (aio_req) { + aio_req->total_len += len; + aio_req->num_reqs++; +-- +2.39.5 + diff --git a/queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch b/queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch new file mode 100644 index 00000000000..69939b8cbe9 --- /dev/null +++ b/queue-6.6/ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch @@ -0,0 +1,144 @@ +From 4f467a6e3ff5a255e73e9ca9f09818efccef155b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Nov 2023 10:44:41 +0800 +Subject: ceph: try to allocate a smaller extent map for sparse read + +From: Xiubo Li + +[ Upstream commit aaefabc4a5f7ae48682c4d2d5d10faaf95c08eb9 ] + +In fscrypt case and for a smaller read length we can predict the +max count of the extent map. And for small read length use cases +this could save some memories. + +[ idryomov: squash into a single patch to avoid build break, drop + redundant variable in ceph_alloc_sparse_ext_map() ] + +Signed-off-by: Xiubo Li +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Stable-dep-of: 18d44c5d062b ("ceph: allocate sparse_ext map only for sparse reads") +Signed-off-by: Sasha Levin +--- + fs/ceph/addr.c | 4 +++- + fs/ceph/file.c | 8 ++++++-- + fs/ceph/super.h | 14 ++++++++++++++ + include/linux/ceph/osd_client.h | 7 +++++-- + 4 files changed, 28 insertions(+), 5 deletions(-) + +diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c +index 1a2776025e98..2c92de964c5a 100644 +--- a/fs/ceph/addr.c ++++ b/fs/ceph/addr.c +@@ -355,6 +355,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) + u64 len = subreq->len; + bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); + u64 off = subreq->start; ++ int extent_cnt; + + if (ceph_inode_is_shutdown(inode)) { + err = -EIO; +@@ -377,7 +378,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) + } + + if (sparse) { +- err = ceph_alloc_sparse_ext_map(&req->r_ops[0]); ++ extent_cnt = __ceph_sparse_read_ext_count(inode, len); ++ err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt); + if (err) + goto out; + } +diff --git a/fs/ceph/file.c b/fs/ceph/file.c +index 813974244a9d..23dcfb916298 100644 +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1001,6 +1001,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, + struct ceph_osd_req_op *op; + u64 read_off = off; + u64 read_len = len; ++ int extent_cnt; + + /* determine new offset/length if encrypted */ + ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); +@@ -1040,7 +1041,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, + + op = &req->r_ops[0]; + if (sparse) { +- ret = ceph_alloc_sparse_ext_map(op); ++ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); + if (ret) { + ceph_osdc_put_request(req); + break; +@@ -1431,6 +1433,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + ssize_t len; + struct ceph_osd_req_op *op; + int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ; ++ int extent_cnt; + + if (write) + size = min_t(u64, size, fsc->mount_options->wsize); +@@ -1494,7 +1497,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, + osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); + op = &req->r_ops[0]; + if (sparse) { +- ret = ceph_alloc_sparse_ext_map(op); ++ extent_cnt = __ceph_sparse_read_ext_count(inode, size); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); + if (ret) { + ceph_osdc_put_request(req); + break; +diff --git a/fs/ceph/super.h b/fs/ceph/super.h +index 8efd4ba60774..5903e3fb6d75 100644 +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -3,6 +3,7 @@ + #define _FS_CEPH_SUPER_H + + #include ++#include + + #include + #include +@@ -1401,6 +1402,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci, + ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota); + } + ++static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len) ++{ ++ int cnt = 0; ++ ++ if (IS_ENCRYPTED(inode)) { ++ cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT; ++ if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL) ++ cnt = 0; ++ } ++ ++ return cnt; ++} ++ + extern void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg); +diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h +index f703fb8030de..50e409e84466 100644 +--- a/include/linux/ceph/osd_client.h ++++ b/include/linux/ceph/osd_client.h +@@ -573,9 +573,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); + */ + #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 + +-static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) ++static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) + { +- return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); ++ if (!cnt) ++ cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL; ++ ++ return __ceph_alloc_sparse_ext_map(op, cnt); + } + + extern void ceph_osdc_get_request(struct ceph_osd_request *req); +-- +2.39.5 + diff --git a/queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch b/queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch new file mode 100644 index 00000000000..ce67de97a32 --- /dev/null +++ b/queue-6.6/media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch @@ -0,0 +1,62 @@ +From bf30e6deaa923acc334dedf8b8417d8e76b7ab19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 08:58:00 -0700 +Subject: media: dvb-frontends: dib3000mb: fix uninit-value in + dib3000_write_reg + +From: Nikita Zhandarovich + +[ Upstream commit 2dd59fe0e19e1ab955259978082b62e5751924c7 ] + +Syzbot reports [1] an uninitialized value issue found by KMSAN in +dib3000_read_reg(). + +Local u8 rb[2] is used in i2c_transfer() as a read buffer; in case +that call fails, the buffer may end up with some undefined values. + +Since no elaborate error handling is expected in dib3000_write_reg(), +simply zero out rb buffer to mitigate the problem. + +[1] Syzkaller report +dvb-usb: bulk message failed: -22 (6/0) +===================================================== +BUG: KMSAN: uninit-value in dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 + dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 + dibusb_dib3000mb_frontend_attach+0x155/0x2f0 drivers/media/usb/dvb-usb/dibusb-mb.c:31 + dvb_usb_adapter_frontend_init+0xed/0x9a0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290 + dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:90 [inline] + dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:186 [inline] + dvb_usb_device_init+0x25a8/0x3760 drivers/media/usb/dvb-usb/dvb-usb-init.c:310 + dibusb_probe+0x46/0x250 drivers/media/usb/dvb-usb/dibusb-mb.c:110 +... +Local variable rb created at: + dib3000_read_reg+0x86/0x4e0 drivers/media/dvb-frontends/dib3000mb.c:54 + dib3000mb_attach+0x123/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 +... + +Fixes: 74340b0a8bc6 ("V4L/DVB (4457): Remove dib3000-common-module") +Reported-by: syzbot+c88fc0ebe0d5935c70da@syzkaller.appspotmail.com +Signed-off-by: Nikita Zhandarovich +Link: https://lore.kernel.org/r/20240517155800.9881-1-n.zhandarovich@fintech.ru +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Sasha Levin +--- + drivers/media/dvb-frontends/dib3000mb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c +index c598b2a63325..7c452ddd9e40 100644 +--- a/drivers/media/dvb-frontends/dib3000mb.c ++++ b/drivers/media/dvb-frontends/dib3000mb.c +@@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a + static int dib3000_read_reg(struct dib3000_state *state, u16 reg) + { + u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff }; +- u8 rb[2]; ++ u8 rb[2] = {}; + struct i2c_msg msg[] = { + { .addr = state->config.demod_address, .flags = 0, .buf = wb, .len = 2 }, + { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 }, +-- +2.39.5 + diff --git a/queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch b/queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch new file mode 100644 index 00000000000..65112a24d03 --- /dev/null +++ b/queue-6.6/mm-vmstat-fix-a-w-1-clang-compiler-warning.patch @@ -0,0 +1,43 @@ +From 61e7a5c6fc88bf17949890241db1baf41001c14b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 13:31:26 -0800 +Subject: mm/vmstat: fix a W=1 clang compiler warning + +From: Bart Van Assche + +[ Upstream commit 30c2de0a267c04046d89e678cc0067a9cfb455df ] + +Fix the following clang compiler warning that is reported if the kernel is +built with W=1: + +./include/linux/vmstat.h:518:36: error: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Werror,-Wenum-enum-conversion] + 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" + | ~~~~~~~~~~~ ^ ~~~ + +Link: https://lkml.kernel.org/r/20241212213126.1269116-1-bvanassche@acm.org +Fixes: 9d7ea9a297e6 ("mm/vmstat: add helpers to get vmstat item names for each enum type") +Signed-off-by: Bart Van Assche +Cc: Konstantin Khlebnikov +Cc: Nathan Chancellor +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/vmstat.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index fed855bae6d8..3219b368db79 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -519,7 +519,7 @@ static inline const char *node_stat_name(enum node_stat_item item) + + static inline const char *lru_list_name(enum lru_list lru) + { +- return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" ++ return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_" + } + + static inline const char *writeback_stat_name(enum writeback_stat_item item) +-- +2.39.5 + diff --git a/queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch b/queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch new file mode 100644 index 00000000000..9dc8c3faa0d --- /dev/null +++ b/queue-6.6/nfsd-restore-callback-functionality-for-nfsv4.0.patch @@ -0,0 +1,51 @@ +From a00416c3c42c674bf5acca21b344b02a2bd42d53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Dec 2024 15:28:18 +1100 +Subject: nfsd: restore callback functionality for NFSv4.0 + +From: NeilBrown + +[ Upstream commit 7917f01a286ce01e9c085e24468421f596ee1a0c ] + +A recent patch inadvertently broke callbacks for NFSv4.0. + +In the 4.0 case we do not expect a session to be found but still need to +call setup_callback_client() which will not try to dereference it. + +This patch moves the check for failure to find a session into the 4.1+ +branch of setup_callback_client() + +Fixes: 1e02c641c3a4 ("NFSD: Prevent NULL dereference in nfsd4_process_cb_update()") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4callback.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c +index 49a49529c6b8..54ffadf02e03 100644 +--- a/fs/nfsd/nfs4callback.c ++++ b/fs/nfsd/nfs4callback.c +@@ -986,7 +986,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c + args.authflavor = clp->cl_cred.cr_flavor; + clp->cl_cb_ident = conn->cb_ident; + } else { +- if (!conn->cb_xprt) ++ if (!conn->cb_xprt || !ses) + return -EINVAL; + clp->cl_cb_session = ses; + args.bc_xprt = conn->cb_xprt; +@@ -1379,8 +1379,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) + ses = c->cn_session; + } + spin_unlock(&clp->cl_lock); +- if (!c) +- return; + + err = setup_callback_client(clp, &conn, ses); + if (err) { +-- +2.39.5 + diff --git a/queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch b/queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch new file mode 100644 index 00000000000..0596db066f0 --- /dev/null +++ b/queue-6.6/nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch @@ -0,0 +1,132 @@ +From f27ad9dce8f36f887ef33f140ddd917f58edbe3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Dec 2024 22:21:52 +0800 +Subject: nfsd: Revert "nfsd: release svc_expkey/svc_export with rcu_work" + +From: Yang Erkun + +[ Upstream commit 69d803c40edeaf94089fbc8751c9b746cdc35044 ] + +This reverts commit f8c989a0c89a75d30f899a7cabdc14d72522bb8d. + +Before this commit, svc_export_put or expkey_put will call path_put with +sync mode. After this commit, path_put will be called with async mode. +And this can lead the unexpected results show as follow. + +mkfs.xfs -f /dev/sda +echo "/ *(rw,no_root_squash,fsid=0)" > /etc/exports +echo "/mnt *(rw,no_root_squash,fsid=1)" >> /etc/exports +exportfs -ra +service nfs-server start +mount -t nfs -o vers=4.0 127.0.0.1:/mnt /mnt1 +mount /dev/sda /mnt/sda +touch /mnt1/sda/file +exportfs -r +umount /mnt/sda # failed unexcepted + +The touch will finally call nfsd_cross_mnt, add refcount to mount, and +then add cache_head. Before this commit, exportfs -r will call +cache_flush to cleanup all cache_head, and path_put in +svc_export_put/expkey_put will be finished with sync mode. So, the +latter umount will always success. However, after this commit, path_put +will be called with async mode, the latter umount may failed, and if +we add some delay, umount will success too. Personally I think this bug +and should be fixed. We first revert before bugfix patch, and then fix +the original bug with a different way. + +Fixes: f8c989a0c89a ("nfsd: release svc_expkey/svc_export with rcu_work") +Signed-off-by: Yang Erkun +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/export.c | 31 ++++++------------------------- + fs/nfsd/export.h | 4 ++-- + 2 files changed, 8 insertions(+), 27 deletions(-) + +diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c +index d4d3ec58047e..4b5d998cbc2f 100644 +--- a/fs/nfsd/export.c ++++ b/fs/nfsd/export.c +@@ -40,24 +40,15 @@ + #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) + #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) + +-static void expkey_put_work(struct work_struct *work) ++static void expkey_put(struct kref *ref) + { +- struct svc_expkey *key = +- container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); ++ struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + if (test_bit(CACHE_VALID, &key->h.flags) && + !test_bit(CACHE_NEGATIVE, &key->h.flags)) + path_put(&key->ek_path); + auth_domain_put(key->ek_client); +- kfree(key); +-} +- +-static void expkey_put(struct kref *ref) +-{ +- struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); +- +- INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); +- queue_rcu_work(system_wq, &key->ek_rcu_work); ++ kfree_rcu(key, ek_rcu); + } + + static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +@@ -360,26 +351,16 @@ static void export_stats_destroy(struct export_stats *stats) + EXP_STATS_COUNTERS_NUM); + } + +-static void svc_export_put_work(struct work_struct *work) ++static void svc_export_put(struct kref *ref) + { +- struct svc_export *exp = +- container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); +- ++ struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + nfsd4_fslocs_free(&exp->ex_fslocs); + export_stats_destroy(exp->ex_stats); + kfree(exp->ex_stats); + kfree(exp->ex_uuid); +- kfree(exp); +-} +- +-static void svc_export_put(struct kref *ref) +-{ +- struct svc_export *exp = container_of(ref, struct svc_export, h.ref); +- +- INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); +- queue_rcu_work(system_wq, &exp->ex_rcu_work); ++ kfree_rcu(exp, ex_rcu); + } + + static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h +index 9d895570ceba..ca9dc230ae3d 100644 +--- a/fs/nfsd/export.h ++++ b/fs/nfsd/export.h +@@ -75,7 +75,7 @@ struct svc_export { + u32 ex_layout_types; + struct nfsd4_deviceid_map *ex_devid_map; + struct cache_detail *cd; +- struct rcu_work ex_rcu_work; ++ struct rcu_head ex_rcu; + unsigned long ex_xprtsec_modes; + struct export_stats *ex_stats; + }; +@@ -92,7 +92,7 @@ struct svc_expkey { + u32 ek_fsid[6]; + + struct path ek_path; +- struct rcu_work ek_rcu_work; ++ struct rcu_head ek_rcu; + }; + + #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +-- +2.39.5 + diff --git a/queue-6.6/series b/queue-6.6/series new file mode 100644 index 00000000000..4e13e3a43f9 --- /dev/null +++ b/queue-6.6/series @@ -0,0 +1,10 @@ +media-dvb-frontends-dib3000mb-fix-uninit-value-in-di.patch +ceph-try-to-allocate-a-smaller-extent-map-for-sparse.patch +ceph-fix-memory-leak-in-ceph_direct_read_write.patch +ceph-allocate-sparse_ext-map-only-for-sparse-reads.patch +mm-vmstat-fix-a-w-1-clang-compiler-warning.patch +tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch +tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch +bpf-check-negative-offsets-in-__bpf_skb_min_len.patch +nfsd-revert-nfsd-release-svc_expkey-svc_export-with-.patch +nfsd-restore-callback-functionality-for-nfsv4.0.patch diff --git a/queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch b/queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch new file mode 100644 index 00000000000..30fa1f03185 --- /dev/null +++ b/queue-6.6/tcp_bpf-add-sk_rmem_alloc-related-logic-for-tcp_bpf-.patch @@ -0,0 +1,116 @@ +From 3d96fc6e00f9a888555b9d95d0e9b2d01b0d57ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 01:20:39 +0000 +Subject: tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress + redirection + +From: Zijian Zhang + +[ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ] + +When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will +be created out of the skb, and the rmem accounting of the sk_msg will be +handled by the skb. + +For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting +to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to +the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result, +except for the global memory limit, the rmem of sk_redir is nearly +unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer. + +Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are +used in these two paths. We use "msg->skb" to test whether the sk_msg is +skb backed up. If it's not, we shall do the memory accounting explicitly. + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Zijian Zhang +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com +Signed-off-by: Sasha Levin +--- + include/linux/skmsg.h | 11 ++++++++--- + net/core/skmsg.c | 6 +++++- + net/ipv4/tcp_bpf.c | 4 +++- + 3 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h +index 062fe440f5d0..6ccfd9236387 100644 +--- a/include/linux/skmsg.h ++++ b/include/linux/skmsg.h +@@ -308,17 +308,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) + kfree_skb(skb); + } + +-static inline void sk_psock_queue_msg(struct sk_psock *psock, ++static inline bool sk_psock_queue_msg(struct sk_psock *psock, + struct sk_msg *msg) + { ++ bool ret; ++ + spin_lock_bh(&psock->ingress_lock); +- if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) ++ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + list_add_tail(&msg->list, &psock->ingress_msg); +- else { ++ ret = true; ++ } else { + sk_msg_free(psock->sk, msg); + kfree(msg); ++ ret = false; + } + spin_unlock_bh(&psock->ingress_lock); ++ return ret; + } + + static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) +diff --git a/net/core/skmsg.c b/net/core/skmsg.c +index 846fd672f0e5..902098e221b3 100644 +--- a/net/core/skmsg.c ++++ b/net/core/skmsg.c +@@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; +- if (!msg_rx->skb) ++ if (!msg_rx->skb) { + sk_mem_uncharge(sk, copy); ++ atomic_sub(copy, &sk->sk_rmem_alloc); ++ } + msg_rx->sg.size -= copy; + + if (!sge->length) { +@@ -772,6 +774,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) + + list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { + list_del(&msg->list); ++ if (!msg->skb) ++ atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); + sk_msg_free(psock->sk, msg); + kfree(msg); + } +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index a7b5b160d107..f882054fae5e 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, + } + + sk_mem_charge(sk, size); ++ atomic_add(size, &sk->sk_rmem_alloc); + sk_msg_xfer(tmp, msg, i, size); + copied += size; + if (sge->length) +@@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, + + if (!ret) { + msg->sg.start = i; +- sk_psock_queue_msg(psock, tmp); ++ if (!sk_psock_queue_msg(psock, tmp)) ++ atomic_sub(copied, &sk->sk_rmem_alloc); + sk_psock_data_ready(sk, psock); + } else { + sk_msg_free(sk, tmp); +-- +2.39.5 + diff --git a/queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch b/queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch new file mode 100644 index 00000000000..476d0702202 --- /dev/null +++ b/queue-6.6/tcp_bpf-charge-receive-socket-buffer-in-bpf_tcp_ingr.patch @@ -0,0 +1,71 @@ +From 221ebfa00bcf79f779b41c73d3a6b58a6dd070b4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 01:20:38 +0000 +Subject: tcp_bpf: Charge receive socket buffer in bpf_tcp_ingress() + +From: Cong Wang + +[ Upstream commit 54f89b3178d5448dd4457afbb98fc1ab99090a65 ] + +When bpf_tcp_ingress() is called, the skmsg is being redirected to the +ingress of the destination socket. Therefore, we should charge its +receive socket buffer, instead of sending socket buffer. + +Because sk_rmem_schedule() tests pfmemalloc of skb, we need to +introduce a wrapper and call it for skmsg. + +Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") +Signed-off-by: Cong Wang +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/bpf/20241210012039.1669389-2-zijianzhang@bytedance.com +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 10 ++++++++-- + net/ipv4/tcp_bpf.c | 2 +- + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index a6b795ec7c9c..dc625f94ee37 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1635,7 +1635,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size) + } + + static inline bool +-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) ++__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc) + { + int delta; + +@@ -1643,7 +1643,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) + return true; + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || +- skb_pfmemalloc(skb); ++ pfmemalloc; ++} ++ ++static inline bool ++sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) ++{ ++ return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb)); + } + + static inline int sk_unused_reserved_mem(const struct sock *sk) +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 0a42d73c0850..a7b5b160d107 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -49,7 +49,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, + sge = sk_msg_elem(msg, i); + size = (apply && apply_bytes < sge->length) ? + apply_bytes : sge->length; +- if (!sk_wmem_schedule(sk, size)) { ++ if (!__sk_rmem_schedule(sk, size, false)) { + if (!copied) + ret = -ENOMEM; + break; +-- +2.39.5 +