From: Sasha Levin Date: Tue, 3 Dec 2024 12:33:28 +0000 (-0500) Subject: Fixes for 6.6 X-Git-Tag: v4.19.325~15^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c70551541a5ffb3ae263a21fb7896a0da7d4f2c4;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/9p-xen-fix-init-sequence.patch b/queue-6.6/9p-xen-fix-init-sequence.patch new file mode 100644 index 00000000000..04dd0a41737 --- /dev/null +++ b/queue-6.6/9p-xen-fix-init-sequence.patch @@ -0,0 +1,56 @@ +From 07802e895686d46404deb0cdccff621d2f74ad99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 21:16:33 +0000 +Subject: 9p/xen: fix init sequence + +From: Alex Zenla + +[ Upstream commit 7ef3ae82a6ebbf4750967d1ce43bcdb7e44ff74b ] + +Large amount of mount hangs observed during hotplugging of 9pfs devices. The +9pfs Xen driver attempts to initialize itself more than once, causing the +frontend and backend to disagree: the backend listens on a channel that the +frontend does not send on, resulting in stalled processing. + +Only allow initialization of 9p frontend once. + +Fixes: c15fe55d14b3b ("9p/xen: fix connection sequence") +Signed-off-by: Alex Zenla +Signed-off-by: Alexander Merritt +Signed-off-by: Ariadne Conill +Reviewed-by: Juergen Gross +Message-ID: <20241119211633.38321-1-alexander@edera.dev> +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/trans_xen.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c +index 1fffe2bed5b02..308dae05aa9a1 100644 +--- a/net/9p/trans_xen.c ++++ b/net/9p/trans_xen.c +@@ -466,6 +466,7 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) + goto error; + } + ++ xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: +@@ -513,8 +514,10 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev, + break; + + case XenbusStateInitWait: +- if (!xen_9pfs_front_init(dev)) +- xenbus_switch_state(dev, XenbusStateInitialised); ++ if (dev->state != XenbusStateInitialising) ++ break; ++ ++ xen_9pfs_front_init(dev); + break; + + case XenbusStateConnected: +-- +2.43.0 + diff --git a/queue-6.6/9p-xen-fix-release-of-irq.patch b/queue-6.6/9p-xen-fix-release-of-irq.patch new file mode 100644 index 00000000000..171b3b83b81 --- /dev/null +++ b/queue-6.6/9p-xen-fix-release-of-irq.patch @@ -0,0 +1,42 @@ +From 8bb19e2c54dc519c8bfc1227137e38a8dbc07033 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Nov 2024 22:51:00 +0000 +Subject: 9p/xen: fix release of IRQ + +From: Alex Zenla + +[ Upstream commit e43c608f40c065b30964f0a806348062991b802d ] + +Kernel logs indicate an IRQ was double-freed. + +Pass correct device ID during IRQ release. + +Fixes: 71ebd71921e45 ("xen/9pfs: connect to the backend") +Signed-off-by: Alex Zenla +Signed-off-by: Alexander Merritt +Signed-off-by: Ariadne Conill +Reviewed-by: Juergen Gross +Message-ID: <20241121225100.5736-1-alexander@edera.dev> +[Dominique: remove confusing variable reset to 0] +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/trans_xen.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c +index 308dae05aa9a1..6387ee924a2d6 100644 +--- a/net/9p/trans_xen.c ++++ b/net/9p/trans_xen.c +@@ -287,7 +287,7 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) + if (!priv->rings[i].intf) + break; + if (priv->rings[i].irq > 0) +- unbind_from_irqhandler(priv->rings[i].irq, priv->dev); ++ unbind_from_irqhandler(priv->rings[i].irq, ring); + if (priv->rings[i].data.in) { + for (j = 0; + j < (1 << priv->rings[i].intf->ring_order); +-- +2.43.0 + diff --git a/queue-6.6/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch b/queue-6.6/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch new file mode 100644 index 00000000000..cf1992729ac --- /dev/null +++ b/queue-6.6/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch @@ -0,0 +1,199 @@ +From 8a745a979865f291c6bceccad4fe0f44364283eb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Nov 2024 17:15:09 +0800 +Subject: block, bfq: fix bfqq uaf in bfq_limit_depth() + +From: Yu Kuai + +[ Upstream commit e8b8344de3980709080d86c157d24e7de07d70ad ] + +Set new allocated bfqq to bic or remove freed bfqq from bic are both +protected by bfqd->lock, however bfq_limit_depth() is deferencing bfqq +from bic without the lock, this can lead to UAF if the io_context is +shared by multiple tasks. + +For example, test bfq with io_uring can trigger following UAF in v6.6: + +================================================================== +BUG: KASAN: slab-use-after-free in bfqq_group+0x15/0x50 + +Call Trace: + + dump_stack_lvl+0x47/0x80 + print_address_description.constprop.0+0x66/0x300 + print_report+0x3e/0x70 + kasan_report+0xb4/0xf0 + bfqq_group+0x15/0x50 + bfqq_request_over_limit+0x130/0x9a0 + bfq_limit_depth+0x1b5/0x480 + __blk_mq_alloc_requests+0x2b5/0xa00 + blk_mq_get_new_requests+0x11d/0x1d0 + blk_mq_submit_bio+0x286/0xb00 + submit_bio_noacct_nocheck+0x331/0x400 + __block_write_full_folio+0x3d0/0x640 + writepage_cb+0x3b/0xc0 + write_cache_pages+0x254/0x6c0 + write_cache_pages+0x254/0x6c0 + do_writepages+0x192/0x310 + filemap_fdatawrite_wbc+0x95/0xc0 + __filemap_fdatawrite_range+0x99/0xd0 + filemap_write_and_wait_range.part.0+0x4d/0xa0 + blkdev_read_iter+0xef/0x1e0 + io_read+0x1b6/0x8a0 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork_asm+0x1b/0x30 + + +Allocated by task 808602: + kasan_save_stack+0x1e/0x40 + kasan_set_track+0x21/0x30 + __kasan_slab_alloc+0x83/0x90 + kmem_cache_alloc_node+0x1b1/0x6d0 + bfq_get_queue+0x138/0xfa0 + bfq_get_bfqq_handle_split+0xe3/0x2c0 + bfq_init_rq+0x196/0xbb0 + bfq_insert_request.isra.0+0xb5/0x480 + bfq_insert_requests+0x156/0x180 + blk_mq_insert_request+0x15d/0x440 + blk_mq_submit_bio+0x8a4/0xb00 + submit_bio_noacct_nocheck+0x331/0x400 + __blkdev_direct_IO_async+0x2dd/0x330 + blkdev_write_iter+0x39a/0x450 + io_write+0x22a/0x840 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork+0x2d/0x50 + ret_from_fork_asm+0x1b/0x30 + +Freed by task 808589: + kasan_save_stack+0x1e/0x40 + kasan_set_track+0x21/0x30 + kasan_save_free_info+0x27/0x40 + __kasan_slab_free+0x126/0x1b0 + kmem_cache_free+0x10c/0x750 + bfq_put_queue+0x2dd/0x770 + __bfq_insert_request.isra.0+0x155/0x7a0 + bfq_insert_request.isra.0+0x122/0x480 + bfq_insert_requests+0x156/0x180 + blk_mq_dispatch_plug_list+0x528/0x7e0 + blk_mq_flush_plug_list.part.0+0xe5/0x590 + __blk_flush_plug+0x3b/0x90 + blk_finish_plug+0x40/0x60 + do_writepages+0x19d/0x310 + filemap_fdatawrite_wbc+0x95/0xc0 + __filemap_fdatawrite_range+0x99/0xd0 + filemap_write_and_wait_range.part.0+0x4d/0xa0 + blkdev_read_iter+0xef/0x1e0 + io_read+0x1b6/0x8a0 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork+0x2d/0x50 + ret_from_fork_asm+0x1b/0x30 + +Fix the problem by protecting bic_to_bfqq() with bfqd->lock. + +CC: Jan Kara +Fixes: 76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup") +Signed-off-by: Yu Kuai +Link: https://lore.kernel.org/r/20241129091509.2227136-1-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/bfq-iosched.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 7e0dcded5713a..dd8ca3f7ba60a 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -582,23 +582,31 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, + #define BFQ_LIMIT_INLINE_DEPTH 16 + + #ifdef CONFIG_BFQ_GROUP_IOSCHED +-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) ++static bool bfqq_request_over_limit(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic, blk_opf_t opf, ++ unsigned int act_idx, int limit) + { +- struct bfq_data *bfqd = bfqq->bfqd; +- struct bfq_entity *entity = &bfqq->entity; + struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; + struct bfq_entity **entities = inline_entities; +- int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; +- int class_idx = bfqq->ioprio_class - 1; ++ int alloc_depth = BFQ_LIMIT_INLINE_DEPTH; + struct bfq_sched_data *sched_data; ++ struct bfq_entity *entity; ++ struct bfq_queue *bfqq; + unsigned long wsum; + bool ret = false; +- +- if (!entity->on_st_or_in_serv) +- return false; ++ int depth; ++ int level; + + retry: + spin_lock_irq(&bfqd->lock); ++ bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx); ++ if (!bfqq) ++ goto out; ++ ++ entity = &bfqq->entity; ++ if (!entity->on_st_or_in_serv) ++ goto out; ++ + /* +1 for bfqq entity, root cgroup not included */ + depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; + if (depth > alloc_depth) { +@@ -643,7 +651,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) + * class. + */ + wsum = 0; +- for (i = 0; i <= class_idx; i++) { ++ for (i = 0; i <= bfqq->ioprio_class - 1; i++) { + wsum = wsum * IOPRIO_BE_NR + + sched_data->service_tree[i].wsum; + } +@@ -666,7 +674,9 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) + return ret; + } + #else +-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) ++static bool bfqq_request_over_limit(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic, blk_opf_t opf, ++ unsigned int act_idx, int limit) + { + return false; + } +@@ -704,8 +714,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) + } + + for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { +- struct bfq_queue *bfqq = +- bic_to_bfqq(bic, op_is_sync(opf), act_idx); ++ /* Fast path to check if bfqq is already allocated. */ ++ if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx)) ++ continue; + + /* + * Does queue (or any parent entity) exceed number of +@@ -713,7 +724,7 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) + * limit depth so that it cannot consume more + * available requests and thus starve other entities. + */ +- if (bfqq && bfqq_request_over_limit(bfqq, limit)) { ++ if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { + depth = 1; + break; + } +-- +2.43.0 + diff --git a/queue-6.6/block-return-unsigned-int-from-bdev_io_min.patch b/queue-6.6/block-return-unsigned-int-from-bdev_io_min.patch new file mode 100644 index 00000000000..1bc7cdbb3db --- /dev/null +++ b/queue-6.6/block-return-unsigned-int-from-bdev_io_min.patch @@ -0,0 +1,39 @@ +From 2f6927c11c49c4cb147a42de685e5861c105d952 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 08:26:02 +0100 +Subject: block: return unsigned int from bdev_io_min + +From: Christoph Hellwig + +[ Upstream commit 46fd48ab3ea3eb3bb215684bd66ea3d260b091a9 ] + +The underlying limit is defined as an unsigned int, so return that from +bdev_io_min as well. + +Fixes: ac481c20ef8f ("block: Topology ioctls") +Signed-off-by: Christoph Hellwig +Reviewed-by: Martin K. Petersen +Reviewed-by: John Garry +Link: https://lore.kernel.org/r/20241119072602.1059488-1-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/blkdev.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index a7b65d4ab616e..ef35e9a9878c6 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1184,7 +1184,7 @@ static inline unsigned int queue_io_min(const struct request_queue *q) + return q->limits.io_min; + } + +-static inline int bdev_io_min(struct block_device *bdev) ++static inline unsigned int bdev_io_min(struct block_device *bdev) + { + return queue_io_min(bdev_get_queue(bdev)); + } +-- +2.43.0 + diff --git a/queue-6.6/cifs-during-remount-make-sure-passwords-are-in-sync.patch b/queue-6.6/cifs-during-remount-make-sure-passwords-are-in-sync.patch new file mode 100644 index 00000000000..44b21478546 --- /dev/null +++ b/queue-6.6/cifs-during-remount-make-sure-passwords-are-in-sync.patch @@ -0,0 +1,166 @@ +From 3d9bfaebebe5a067aea3f0bab611da7bcba7c495 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 06:45:50 +0000 +Subject: cifs: during remount, make sure passwords are in sync + +From: Shyam Prasad N + +[ Upstream commit 0f0e357902957fba28ed31bde0d6921c6bd1485d ] + +This fixes scenarios where remount can overwrite the only currently +working password, breaking reconnect. + +We recently introduced a password2 field in both ses and ctx structs. +This was done so as to allow the client to rotate passwords for a mount +without any downtime. However, when the client transparently handles +password rotation, it can swap the values of the two password fields +in the ses struct, but not in smb3_fs_context struct that hangs off +cifs_sb. This can lead to a situation where a remount unintentionally +overwrites a working password in the ses struct. + +In order to fix this, we first get the passwords in ctx struct +in-sync with ses struct, before replacing them with what the passwords +that could be passed as a part of remount. + +Also, in order to avoid race condition between smb2_reconnect and +smb3_reconfigure, we make sure to lock session_mutex before changing +password and password2 fields of the ses structure. + +Fixes: 35f834265e0d ("smb3: fix broken reconnect when password changing on the server by allowing password rotation") +Signed-off-by: Shyam Prasad N +Signed-off-by: Meetakshi Setiya +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/fs_context.c | 83 +++++++++++++++++++++++++++++++++----- + fs/smb/client/fs_context.h | 1 + + 2 files changed, 75 insertions(+), 9 deletions(-) + +diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c +index 8d7484400fe8e..6ba38bfa645b4 100644 +--- a/fs/smb/client/fs_context.c ++++ b/fs/smb/client/fs_context.c +@@ -888,12 +888,37 @@ do { \ + cifs_sb->ctx->field = NULL; \ + } while (0) + ++int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) ++{ ++ if (ses->password && ++ cifs_sb->ctx->password && ++ strcmp(ses->password, cifs_sb->ctx->password)) { ++ kfree_sensitive(cifs_sb->ctx->password); ++ cifs_sb->ctx->password = kstrdup(ses->password, GFP_KERNEL); ++ if (!cifs_sb->ctx->password) ++ return -ENOMEM; ++ } ++ if (ses->password2 && ++ cifs_sb->ctx->password2 && ++ strcmp(ses->password2, cifs_sb->ctx->password2)) { ++ kfree_sensitive(cifs_sb->ctx->password2); ++ cifs_sb->ctx->password2 = kstrdup(ses->password2, GFP_KERNEL); ++ if (!cifs_sb->ctx->password2) { ++ kfree_sensitive(cifs_sb->ctx->password); ++ cifs_sb->ctx->password = NULL; ++ return -ENOMEM; ++ } ++ } ++ return 0; ++} ++ + static int smb3_reconfigure(struct fs_context *fc) + { + struct smb3_fs_context *ctx = smb3_fc2context(fc); + struct dentry *root = fc->root; + struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); + struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; ++ char *new_password = NULL, *new_password2 = NULL; + bool need_recon = false; + int rc; + +@@ -913,21 +938,61 @@ static int smb3_reconfigure(struct fs_context *fc) + STEAL_STRING(cifs_sb, ctx, UNC); + STEAL_STRING(cifs_sb, ctx, source); + STEAL_STRING(cifs_sb, ctx, username); ++ + if (need_recon == false) + STEAL_STRING_SENSITIVE(cifs_sb, ctx, password); + else { +- kfree_sensitive(ses->password); +- ses->password = kstrdup(ctx->password, GFP_KERNEL); +- if (!ses->password) +- return -ENOMEM; +- kfree_sensitive(ses->password2); +- ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); +- if (!ses->password2) { +- kfree_sensitive(ses->password); +- ses->password = NULL; ++ if (ctx->password) { ++ new_password = kstrdup(ctx->password, GFP_KERNEL); ++ if (!new_password) ++ return -ENOMEM; ++ } else ++ STEAL_STRING_SENSITIVE(cifs_sb, ctx, password); ++ } ++ ++ /* ++ * if a new password2 has been specified, then reset it's value ++ * inside the ses struct ++ */ ++ if (ctx->password2) { ++ new_password2 = kstrdup(ctx->password2, GFP_KERNEL); ++ if (!new_password2) { ++ kfree_sensitive(new_password); + return -ENOMEM; + } ++ } else ++ STEAL_STRING_SENSITIVE(cifs_sb, ctx, password2); ++ ++ /* ++ * we may update the passwords in the ses struct below. Make sure we do ++ * not race with smb2_reconnect ++ */ ++ mutex_lock(&ses->session_mutex); ++ ++ /* ++ * smb2_reconnect may swap password and password2 in case session setup ++ * failed. First get ctx passwords in sync with ses passwords. It should ++ * be okay to do this even if this function were to return an error at a ++ * later stage ++ */ ++ rc = smb3_sync_session_ctx_passwords(cifs_sb, ses); ++ if (rc) ++ return rc; ++ ++ /* ++ * now that allocations for passwords are done, commit them ++ */ ++ if (new_password) { ++ kfree_sensitive(ses->password); ++ ses->password = new_password; + } ++ if (new_password2) { ++ kfree_sensitive(ses->password2); ++ ses->password2 = new_password2; ++ } ++ ++ mutex_unlock(&ses->session_mutex); ++ + STEAL_STRING(cifs_sb, ctx, domainname); + STEAL_STRING(cifs_sb, ctx, nodename); + STEAL_STRING(cifs_sb, ctx, iocharset); +diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h +index cf577ec0dd0ac..bbd2063ab838d 100644 +--- a/fs/smb/client/fs_context.h ++++ b/fs/smb/client/fs_context.h +@@ -298,6 +298,7 @@ static inline struct smb3_fs_context *smb3_fc2context(const struct fs_context *f + } + + extern int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx); ++extern int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); + extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); + + /* +-- +2.43.0 + diff --git a/queue-6.6/cifs-fix-parsing-native-symlinks-relative-to-the-exp.patch b/queue-6.6/cifs-fix-parsing-native-symlinks-relative-to-the-exp.patch new file mode 100644 index 00000000000..da190f506b6 --- /dev/null +++ b/queue-6.6/cifs-fix-parsing-native-symlinks-relative-to-the-exp.patch @@ -0,0 +1,383 @@ +From 4f7361582c00c2031cebcc39046e7522c290fd17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Sep 2024 22:40:38 +0200 +Subject: cifs: Fix parsing native symlinks relative to the export +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pali Rohár + +[ Upstream commit 723f4ef90452aa629f3d923e92e0449d69362b1d ] + +SMB symlink which has SYMLINK_FLAG_RELATIVE set is relative (as opposite of +the absolute) and it can be relative either to the current directory (where +is the symlink stored) or relative to the top level export path. To what it +is relative depends on the first character of the symlink target path. + +If the first character is path separator then symlink is relative to the +export, otherwise to the current directory. Linux (and generally POSIX +systems) supports only symlink paths relative to the current directory +where is symlink stored. + +Currently if Linux SMB client reads relative SMB symlink with first +character as path separator (slash), it let as is. Which means that Linux +interpret it as absolute symlink pointing from the root (/). But this +location is different than the top level directory of SMB export (unless +SMB export was mounted to the root) and thefore SMB symlinks relative to +the export are interpreted wrongly by Linux SMB client. + +Fix this problem. As Linux does not have equivalent of the path relative to +the top of the mount point, convert such symlink target path relative to +the current directory. Do this by prepending "../" pattern N times before +the SMB target path, where N is the number of path separators found in SMB +symlink path. + +So for example, if SMB share is mounted to Linux path /mnt/share/, symlink +is stored in file /mnt/share/test/folder1/symlink (so SMB symlink path is +test\folder1\symlink) and SMB symlink target points to \test\folder2\file, +then convert symlink target path to Linux path ../../test/folder2/file. + +Deduplicate code for parsing SMB symlinks in native form from functions +smb2_parse_symlink_response() and parse_reparse_native_symlink() into new +function smb2_parse_native_symlink() and pass into this new function a new +full_path parameter from callers, which specify SMB full path where is +symlink stored. + +This change fixes resolving of the native Windows symlinks relative to the +top level directory of the SMB share. + +Signed-off-by: Pali Rohár +Signed-off-by: Steve French +Stable-dep-of: f4ca4f5a36ea ("cifs: Fix parsing reparse point with native symlink in SMB1 non-UNICODE session") +Signed-off-by: Sasha Levin +--- + fs/smb/client/cifsglob.h | 1 + + fs/smb/client/cifsproto.h | 1 + + fs/smb/client/inode.c | 1 + + fs/smb/client/reparse.c | 90 +++++++++++++++++++++++++++++++++------ + fs/smb/client/reparse.h | 4 +- + fs/smb/client/smb1ops.c | 3 +- + fs/smb/client/smb2file.c | 21 +++++---- + fs/smb/client/smb2inode.c | 6 ++- + fs/smb/client/smb2proto.h | 9 +++- + 9 files changed, 108 insertions(+), 28 deletions(-) + +diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h +index f799f46d9d0b0..6b57b167a49d8 100644 +--- a/fs/smb/client/cifsglob.h ++++ b/fs/smb/client/cifsglob.h +@@ -592,6 +592,7 @@ struct smb_version_operations { + /* Check for STATUS_NETWORK_NAME_DELETED */ + bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); + int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, ++ const char *full_path, + struct kvec *rsp_iov, + struct cifs_open_info_data *data); + int (*create_reparse_symlink)(const unsigned int xid, +diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h +index fbc358c09da3b..fa7901ad3b80b 100644 +--- a/fs/smb/client/cifsproto.h ++++ b/fs/smb/client/cifsproto.h +@@ -679,6 +679,7 @@ char *extract_hostname(const char *unc); + char *extract_sharename(const char *unc); + int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, ++ const char *full_path, + bool unicode, struct cifs_open_info_data *data); + int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, +diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c +index e381ee668849a..0f73f0dc6deb3 100644 +--- a/fs/smb/client/inode.c ++++ b/fs/smb/client/inode.c +@@ -1054,6 +1054,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, + rc = 0; + } else if (iov && server->ops->parse_reparse_point) { + rc = server->ops->parse_reparse_point(cifs_sb, ++ full_path, + iov, data); + } + break; +diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c +index 90da1e2b6217b..f74d0a86f44a4 100644 +--- a/fs/smb/client/reparse.c ++++ b/fs/smb/client/reparse.c +@@ -535,9 +535,76 @@ static int parse_reparse_posix(struct reparse_posix_data *buf, + return 0; + } + ++int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, ++ bool unicode, bool relative, ++ const char *full_path, ++ struct cifs_sb_info *cifs_sb) ++{ ++ char sep = CIFS_DIR_SEP(cifs_sb); ++ char *linux_target = NULL; ++ char *smb_target = NULL; ++ int levels; ++ int rc; ++ int i; ++ ++ smb_target = cifs_strndup_from_utf16(buf, len, unicode, cifs_sb->local_nls); ++ if (!smb_target) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ if (smb_target[0] == sep && relative) { ++ /* ++ * This is a relative SMB symlink from the top of the share, ++ * which is the top level directory of the Linux mount point. ++ * Linux does not support such relative symlinks, so convert ++ * it to the relative symlink from the current directory. ++ * full_path is the SMB path to the symlink (from which is ++ * extracted current directory) and smb_target is the SMB path ++ * where symlink points, therefore full_path must always be on ++ * the SMB share. ++ */ ++ int smb_target_len = strlen(smb_target)+1; ++ levels = 0; ++ for (i = 1; full_path[i]; i++) { /* i=1 to skip leading sep */ ++ if (full_path[i] == sep) ++ levels++; ++ } ++ linux_target = kmalloc(levels*3 + smb_target_len, GFP_KERNEL); ++ if (!linux_target) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < levels; i++) { ++ linux_target[i*3 + 0] = '.'; ++ linux_target[i*3 + 1] = '.'; ++ linux_target[i*3 + 2] = sep; ++ } ++ memcpy(linux_target + levels*3, smb_target+1, smb_target_len); /* +1 to skip leading sep */ ++ } else { ++ linux_target = smb_target; ++ smb_target = NULL; ++ } ++ ++ if (sep == '\\') ++ convert_delimiter(linux_target, '/'); ++ ++ rc = 0; ++ *target = linux_target; ++ ++ cifs_dbg(FYI, "%s: symlink target: %s\n", __func__, *target); ++ ++out: ++ if (rc != 0) ++ kfree(linux_target); ++ kfree(smb_target); ++ return rc; ++} ++ + static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, + u32 plen, bool unicode, + struct cifs_sb_info *cifs_sb, ++ const char *full_path, + struct cifs_open_info_data *data) + { + unsigned int len; +@@ -552,20 +619,18 @@ static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, + return -EIO; + } + +- data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs, +- len, unicode, +- cifs_sb->local_nls); +- if (!data->symlink_target) +- return -ENOMEM; +- +- convert_delimiter(data->symlink_target, '/'); +- cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target); +- +- return 0; ++ return smb2_parse_native_symlink(&data->symlink_target, ++ sym->PathBuffer + offs, ++ len, ++ unicode, ++ le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE, ++ full_path, ++ cifs_sb); + } + + int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, ++ const char *full_path, + bool unicode, struct cifs_open_info_data *data) + { + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); +@@ -580,7 +645,7 @@ int parse_reparse_point(struct reparse_data_buffer *buf, + case IO_REPARSE_TAG_SYMLINK: + return parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)buf, +- plen, unicode, cifs_sb, data); ++ plen, unicode, cifs_sb, full_path, data); + case IO_REPARSE_TAG_LX_SYMLINK: + case IO_REPARSE_TAG_AF_UNIX: + case IO_REPARSE_TAG_LX_FIFO: +@@ -596,6 +661,7 @@ int parse_reparse_point(struct reparse_data_buffer *buf, + } + + int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, ++ const char *full_path, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) + { +@@ -605,7 +671,7 @@ int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, + + buf = (struct reparse_data_buffer *)((u8 *)io + + le32_to_cpu(io->OutputOffset)); +- return parse_reparse_point(buf, plen, cifs_sb, true, data); ++ return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data); + } + + static void wsl_to_fattr(struct cifs_open_info_data *data, +diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h +index 2a9f4f9f79de0..ff05b0e75c928 100644 +--- a/fs/smb/client/reparse.h ++++ b/fs/smb/client/reparse.h +@@ -117,7 +117,9 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, + int smb2_mknod_reparse(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev); +-int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, struct kvec *rsp_iov, ++int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, ++ const char *full_path, ++ struct kvec *rsp_iov, + struct cifs_open_info_data *data); + + #endif /* _CIFS_REPARSE_H */ +diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c +index e3a195824b403..5c8fb75b61457 100644 +--- a/fs/smb/client/smb1ops.c ++++ b/fs/smb/client/smb1ops.c +@@ -994,6 +994,7 @@ static int cifs_query_symlink(const unsigned int xid, + } + + static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, ++ const char *full_path, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) + { +@@ -1004,7 +1005,7 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, + + buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); +- return parse_reparse_point(buf, plen, cifs_sb, unicode, data); ++ return parse_reparse_point(buf, plen, cifs_sb, full_path, unicode, data); + } + + static bool +diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c +index e0ee96d69d495..db9c807115c60 100644 +--- a/fs/smb/client/smb2file.c ++++ b/fs/smb/client/smb2file.c +@@ -63,12 +63,12 @@ static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov) + return sym; + } + +-int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, char **path) ++int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, ++ const char *full_path, char **path) + { + struct smb2_symlink_err_rsp *sym; + unsigned int sub_offs, sub_len; + unsigned int print_offs, print_len; +- char *s; + + if (!cifs_sb || !iov || !iov->iov_base || !iov->iov_len || !path) + return -EINVAL; +@@ -86,15 +86,13 @@ int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec + iov->iov_len < SMB2_SYMLINK_STRUCT_SIZE + print_offs + print_len) + return -EINVAL; + +- s = cifs_strndup_from_utf16((char *)sym->PathBuffer + sub_offs, sub_len, true, +- cifs_sb->local_nls); +- if (!s) +- return -ENOMEM; +- convert_delimiter(s, '/'); +- cifs_dbg(FYI, "%s: symlink target: %s\n", __func__, s); +- +- *path = s; +- return 0; ++ return smb2_parse_native_symlink(path, ++ (char *)sym->PathBuffer + sub_offs, ++ sub_len, ++ true, ++ le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE, ++ full_path, ++ cifs_sb); + } + + int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) +@@ -126,6 +124,7 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 + goto out; + if (hdr->Status == STATUS_STOPPED_ON_SYMLINK) { + rc = smb2_parse_symlink_response(oparms->cifs_sb, &err_iov, ++ oparms->path, + &data->symlink_target); + if (!rc) { + memset(smb2_data, 0, sizeof(*smb2_data)); +diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c +index daa841dfbadcf..8ea476b1fe199 100644 +--- a/fs/smb/client/smb2inode.c ++++ b/fs/smb/client/smb2inode.c +@@ -828,6 +828,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, + + static int parse_create_response(struct cifs_open_info_data *data, + struct cifs_sb_info *cifs_sb, ++ const char *full_path, + const struct kvec *iov) + { + struct smb2_create_rsp *rsp = iov->iov_base; +@@ -841,6 +842,7 @@ static int parse_create_response(struct cifs_open_info_data *data, + break; + case STATUS_STOPPED_ON_SYMLINK: + rc = smb2_parse_symlink_response(cifs_sb, iov, ++ full_path, + &data->symlink_target); + if (rc) + return rc; +@@ -930,14 +932,14 @@ int smb2_query_path_info(const unsigned int xid, + + switch (rc) { + case 0: +- rc = parse_create_response(data, cifs_sb, &out_iov[0]); ++ rc = parse_create_response(data, cifs_sb, full_path, &out_iov[0]); + break; + case -EOPNOTSUPP: + /* + * BB TODO: When support for special files added to Samba + * re-verify this path. + */ +- rc = parse_create_response(data, cifs_sb, &out_iov[0]); ++ rc = parse_create_response(data, cifs_sb, full_path, &out_iov[0]); + if (rc || !data->reparse_point) + goto out; + +diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h +index f6fafa997e991..613667b46c580 100644 +--- a/fs/smb/client/smb2proto.h ++++ b/fs/smb/client/smb2proto.h +@@ -113,7 +113,14 @@ extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const unsigned char *path, char *pbuf, + unsigned int *pbytes_read); +-int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, char **path); ++int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, ++ bool unicode, bool relative, ++ const char *full_path, ++ struct cifs_sb_info *cifs_sb); ++int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, ++ const struct kvec *iov, ++ const char *full_path, ++ char **path); + int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, + void *buf); + extern int smb2_unlock_range(struct cifsFileInfo *cfile, +-- +2.43.0 + diff --git a/queue-6.6/cifs-fix-parsing-reparse-point-with-native-symlink-i.patch b/queue-6.6/cifs-fix-parsing-reparse-point-with-native-symlink-i.patch new file mode 100644 index 00000000000..0a7bdea4e7c --- /dev/null +++ b/queue-6.6/cifs-fix-parsing-reparse-point-with-native-symlink-i.patch @@ -0,0 +1,52 @@ +From 0cfa371a5bba93f0a1fd0a38d29c3e69800a1577 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Oct 2024 19:30:01 +0200 +Subject: cifs: Fix parsing reparse point with native symlink in SMB1 + non-UNICODE session +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pali Rohár + +[ Upstream commit f4ca4f5a36eac9b4da378a0f28cbbe38534a0901 ] + +SMB1 NT_TRANSACT_IOCTL/FSCTL_GET_REPARSE_POINT even in non-UNICODE mode +returns reparse buffer in UNICODE/UTF-16 format. + +This is because FSCTL_GET_REPARSE_POINT is NT-based IOCTL which does not +distinguish between 8-bit non-UNICODE and 16-bit UNICODE modes and its path +buffers are always encoded in UTF-16. + +This change fixes reading of native symlinks in SMB1 when UNICODE session +is not active. + +Fixes: ed3e0a149b58 ("smb: client: implement ->query_reparse_point() for SMB1") +Signed-off-by: Pali Rohár +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/smb1ops.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c +index 5c8fb75b61457..b0c0572f9d1fb 100644 +--- a/fs/smb/client/smb1ops.c ++++ b/fs/smb/client/smb1ops.c +@@ -1000,12 +1000,11 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, + { + struct reparse_data_buffer *buf; + TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; +- bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); + u32 plen = le16_to_cpu(io->ByteCount); + + buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); +- return parse_reparse_point(buf, plen, cifs_sb, full_path, unicode, data); ++ return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data); + } + + static bool +-- +2.43.0 + diff --git a/queue-6.6/cifs-unlock-on-error-in-smb3_reconfigure.patch b/queue-6.6/cifs-unlock-on-error-in-smb3_reconfigure.patch new file mode 100644 index 00000000000..9f6b589d87f --- /dev/null +++ b/queue-6.6/cifs-unlock-on-error-in-smb3_reconfigure.patch @@ -0,0 +1,39 @@ +From 8595fe32e32ab1af3e1aedc2b73457e84e0ff36a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Nov 2024 12:13:58 +0300 +Subject: cifs: unlock on error in smb3_reconfigure() + +From: Dan Carpenter + +[ Upstream commit cda88d2fef7aa7de80b5697e8009fcbbb436f42d ] + +Unlock before returning if smb3_sync_session_ctx_passwords() fails. + +Fixes: 7e654ab7da03 ("cifs: during remount, make sure passwords are in sync") +Signed-off-by: Dan Carpenter +Reviewed-by: Bharath SM +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/fs_context.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c +index 6ba38bfa645b4..4e77ba191ef87 100644 +--- a/fs/smb/client/fs_context.c ++++ b/fs/smb/client/fs_context.c +@@ -976,8 +976,10 @@ static int smb3_reconfigure(struct fs_context *fc) + * later stage + */ + rc = smb3_sync_session_ctx_passwords(cifs_sb, ses); +- if (rc) ++ if (rc) { ++ mutex_unlock(&ses->session_mutex); + return rc; ++ } + + /* + * now that allocations for passwords are done, commit them +-- +2.43.0 + diff --git a/queue-6.6/init-modpost-conditionally-check-section-mismatch-to.patch b/queue-6.6/init-modpost-conditionally-check-section-mismatch-to.patch new file mode 100644 index 00000000000..cd80a0b561b --- /dev/null +++ b/queue-6.6/init-modpost-conditionally-check-section-mismatch-to.patch @@ -0,0 +1,203 @@ +From 074f8c4e4b8f1ea98e4953baa4aa5f13bf30a2f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 Jul 2024 01:05:06 +0900 +Subject: init/modpost: conditionally check section mismatch to __meminit* + +From: Masahiro Yamada + +[ Upstream commit 73db3abdca58c8a014ec4c88cf5ef925cbf63669 ] + +This reverts commit eb8f689046b8 ("Use separate sections for __dev/ +_cpu/__mem code/data"). + +Check section mismatch to __meminit* only when CONFIG_MEMORY_HOTPLUG=n. + +With this change, the linker script and modpost become simpler, and we +can get rid of the __ref annotations from the memory hotplug code. + +[sfr@canb.auug.org.au: remove MEM_KEEP from arch/powerpc/kernel/vmlinux.lds.S] + Link: https://lkml.kernel.org/r/20240710093213.2aefb25f@canb.auug.org.au +Link: https://lkml.kernel.org/r/20240706160511.2331061-2-masahiroy@kernel.org +Signed-off-by: Masahiro Yamada +Signed-off-by: Stephen Rothwell +Reviewed-by: Wei Yang +Cc: Stephen Rothwell +Signed-off-by: Andrew Morton +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + arch/powerpc/kernel/vmlinux.lds.S | 2 -- + include/asm-generic/vmlinux.lds.h | 18 ++---------------- + include/linux/init.h | 14 +++++++++----- + scripts/mod/modpost.c | 19 ++++--------------- + 4 files changed, 15 insertions(+), 38 deletions(-) + +diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S +index f420df7888a75..7ab4e2fb28b1e 100644 +--- a/arch/powerpc/kernel/vmlinux.lds.S ++++ b/arch/powerpc/kernel/vmlinux.lds.S +@@ -123,8 +123,6 @@ SECTIONS + */ + *(.sfpr); + *(.text.asan.* .text.tsan.*) +- MEM_KEEP(init.text) +- MEM_KEEP(exit.text) + } :text + + . = ALIGN(PAGE_SIZE); +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 63029bc7c9dd0..5793aedb24c6d 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -139,14 +139,6 @@ + * often happens at runtime) + */ + +-#if defined(CONFIG_MEMORY_HOTPLUG) +-#define MEM_KEEP(sec) *(.mem##sec) +-#define MEM_DISCARD(sec) +-#else +-#define MEM_KEEP(sec) +-#define MEM_DISCARD(sec) *(.mem##sec) +-#endif +- + #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + #define KEEP_PATCHABLE KEEP(*(__patchable_function_entries)) + #define PATCHABLE_DISCARDS +@@ -355,7 +347,6 @@ + *(.data..decrypted) \ + *(.ref.data) \ + *(.data..shared_aligned) /* percpu related */ \ +- MEM_KEEP(init.data*) \ + *(.data.unlikely) \ + __start_once = .; \ + *(.data.once) \ +@@ -519,7 +510,6 @@ + /* __*init sections */ \ + __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ + *(.ref.rodata) \ +- MEM_KEEP(init.rodata) \ + } \ + \ + /* Built-in module parameters. */ \ +@@ -570,8 +560,7 @@ + *(.text.unknown .text.unknown.*) \ + NOINSTR_TEXT \ + *(.ref.text) \ +- *(.text.asan.* .text.tsan.*) \ +- MEM_KEEP(init.text*) \ ++ *(.text.asan.* .text.tsan.*) + + + /* sched.text is aling to function alignment to secure we have same +@@ -678,7 +667,6 @@ + #define INIT_DATA \ + KEEP(*(SORT(___kentry+*))) \ + *(.init.data .init.data.*) \ +- MEM_DISCARD(init.data*) \ + KERNEL_CTORS() \ + MCOUNT_REC() \ + *(.init.rodata .init.rodata.*) \ +@@ -686,7 +674,6 @@ + TRACE_SYSCALLS() \ + KPROBE_BLACKLIST() \ + ERROR_INJECT_WHITELIST() \ +- MEM_DISCARD(init.rodata) \ + CLK_OF_TABLES() \ + RESERVEDMEM_OF_TABLES() \ + TIMER_OF_TABLES() \ +@@ -704,8 +691,7 @@ + + #define INIT_TEXT \ + *(.init.text .init.text.*) \ +- *(.text.startup) \ +- MEM_DISCARD(init.text*) ++ *(.text.startup) + + #define EXIT_DATA \ + *(.exit.data .exit.data.*) \ +diff --git a/include/linux/init.h b/include/linux/init.h +index 01b52c9c75268..63d2ee4f1f0e0 100644 +--- a/include/linux/init.h ++++ b/include/linux/init.h +@@ -84,11 +84,15 @@ + + #define __exit __section(".exit.text") __exitused __cold notrace + +-/* Used for MEMORY_HOTPLUG */ +-#define __meminit __section(".meminit.text") __cold notrace \ +- __latent_entropy +-#define __meminitdata __section(".meminit.data") +-#define __meminitconst __section(".meminit.rodata") ++#ifdef CONFIG_MEMORY_HOTPLUG ++#define __meminit ++#define __meminitdata ++#define __meminitconst ++#else ++#define __meminit __init ++#define __meminitdata __initdata ++#define __meminitconst __initconst ++#endif + + /* For assembly routines */ + #define __HEAD .section ".head.text","ax" +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index bd559361ecd27..4110d559ed688 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -792,17 +792,14 @@ static void check_section(const char *modname, struct elf_info *elf, + + + #define ALL_INIT_DATA_SECTIONS \ +- ".init.setup", ".init.rodata", ".meminit.rodata", \ +- ".init.data", ".meminit.data" ++ ".init.setup", ".init.rodata", ".init.data" + + #define ALL_PCI_INIT_SECTIONS \ + ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ + ".pci_fixup_enable", ".pci_fixup_resume", \ + ".pci_fixup_resume_early", ".pci_fixup_suspend" + +-#define ALL_XXXINIT_SECTIONS ".meminit.*" +- +-#define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS ++#define ALL_INIT_SECTIONS ".init.*" + #define ALL_EXIT_SECTIONS ".exit.*" + + #define DATA_SECTIONS ".data", ".data.rel" +@@ -813,9 +810,7 @@ static void check_section(const char *modname, struct elf_info *elf, + ".fixup", ".entry.text", ".exception.text", \ + ".coldtext", ".softirqentry.text" + +-#define INIT_SECTIONS ".init.*" +- +-#define ALL_TEXT_SECTIONS ".init.text", ".meminit.text", ".exit.text", \ ++#define ALL_TEXT_SECTIONS ".init.text", ".exit.text", \ + TEXT_SECTIONS, OTHER_TEXT_SECTIONS + + enum mismatch { +@@ -867,12 +862,6 @@ static const struct sectioncheck sectioncheck[] = { + .bad_tosec = { ALL_EXIT_SECTIONS, NULL }, + .mismatch = TEXTDATA_TO_ANY_EXIT, + }, +-/* Do not reference init code/data from meminit code/data */ +-{ +- .fromsec = { ALL_XXXINIT_SECTIONS, NULL }, +- .bad_tosec = { INIT_SECTIONS, NULL }, +- .mismatch = XXXINIT_TO_SOME_INIT, +-}, + /* Do not use exit code/data from init code */ + { + .fromsec = { ALL_INIT_SECTIONS, NULL }, +@@ -887,7 +876,7 @@ static const struct sectioncheck sectioncheck[] = { + }, + { + .fromsec = { ALL_PCI_INIT_SECTIONS, NULL }, +- .bad_tosec = { INIT_SECTIONS, NULL }, ++ .bad_tosec = { ALL_INIT_SECTIONS, NULL }, + .mismatch = ANY_INIT_TO_ANY_EXIT, + }, + { +-- +2.43.0 + diff --git a/queue-6.6/jffs2-fix-use-of-uninitialized-variable.patch b/queue-6.6/jffs2-fix-use-of-uninitialized-variable.patch new file mode 100644 index 00000000000..3968cda93ce --- /dev/null +++ b/queue-6.6/jffs2-fix-use-of-uninitialized-variable.patch @@ -0,0 +1,57 @@ +From 3df4363a71e7a966e2b2b1ae92b0c084f22fafc6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Jul 2024 12:52:05 +0800 +Subject: jffs2: fix use of uninitialized variable + +From: Qingfang Deng + +[ Upstream commit 3ba44ee966bc3c41dd8a944f963466c8fcc60dc8 ] + +When building the kernel with -Wmaybe-uninitialized, the compiler +reports this warning: + +In function 'jffs2_mark_erased_block', + inlined from 'jffs2_erase_pending_blocks' at fs/jffs2/erase.c:116:4: +fs/jffs2/erase.c:474:9: warning: 'bad_offset' may be used uninitialized [-Wmaybe-uninitialized] + 474 | jffs2_erase_failed(c, jeb, bad_offset); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +fs/jffs2/erase.c: In function 'jffs2_erase_pending_blocks': +fs/jffs2/erase.c:402:18: note: 'bad_offset' was declared here + 402 | uint32_t bad_offset; + | ^~~~~~~~~~ + +When mtd->point() is used, jffs2_erase_pending_blocks can return -EIO +without initializing bad_offset, which is later used at the filebad +label in jffs2_mark_erased_block. +Fix it by initializing this variable. + +Fixes: 8a0f572397ca ("[JFFS2] Return values of jffs2_block_check_erase error paths") +Signed-off-by: Qingfang Deng +Reviewed-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + fs/jffs2/erase.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c +index acd32f05b5198..ef3a1e1b6cb06 100644 +--- a/fs/jffs2/erase.c ++++ b/fs/jffs2/erase.c +@@ -338,10 +338,9 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl + } while(--retlen); + mtd_unpoint(c->mtd, jeb->offset, c->sector_size); + if (retlen) { +- pr_warn("Newly-erased block contained word 0x%lx at offset 0x%08tx\n", +- *wordebuf, +- jeb->offset + +- c->sector_size-retlen * sizeof(*wordebuf)); ++ *bad_offset = jeb->offset + c->sector_size - retlen * sizeof(*wordebuf); ++ pr_warn("Newly-erased block contained word 0x%lx at offset 0x%08x\n", ++ *wordebuf, *bad_offset); + return -EIO; + } + return 0; +-- +2.43.0 + diff --git a/queue-6.6/modpost-disallow-driver-to-reference-.meminit-sectio.patch b/queue-6.6/modpost-disallow-driver-to-reference-.meminit-sectio.patch new file mode 100644 index 00000000000..bb06fd22c60 --- /dev/null +++ b/queue-6.6/modpost-disallow-driver-to-reference-.meminit-sectio.patch @@ -0,0 +1,50 @@ +From 0fd96cb00e573ef95c68a8de55c06e5c6f06db77 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:06 +0900 +Subject: modpost: disallow *driver to reference .meminit* sections + +From: Masahiro Yamada + +[ Upstream commit 50cccec15c48814765895891ca0d95d989b6a419 ] + +Drivers must not reference .meminit* sections, which are discarded +when CONFIG_MEMORY_HOTPLUG=n. + +The reason for whitelisting "*driver" in the section mismatch check +was to allow drivers to reference symbols annotated as __devinit or +__devexit that existed in the past. + +Those annotations were removed by the following commits: + + - 54b956b90360 ("Remove __dev* markings from init.h") + - 92e9e6d1f984 ("modpost.c: Stop checking __dev* section mismatches") + +Remove the stale whitelist. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index f6cbf70e455ee..7e88e6437540e 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -1007,12 +1007,6 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, + "*_console"))) + return 0; + +- /* symbols in data sections that may refer to meminit sections */ +- if (match(fromsec, PATTERNS(DATA_SECTIONS)) && +- match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) && +- match(fromsym, PATTERNS("*driver"))) +- return 0; +- + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until +-- +2.43.0 + diff --git a/queue-6.6/modpost-disallow-the-combination-of-export_symbol-an.patch b/queue-6.6/modpost-disallow-the-combination-of-export_symbol-an.patch new file mode 100644 index 00000000000..e851222c9d4 --- /dev/null +++ b/queue-6.6/modpost-disallow-the-combination-of-export_symbol-an.patch @@ -0,0 +1,37 @@ +From d2e58d657afcfe5bf950850fed5bed0a74d988b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:10 +0900 +Subject: modpost: disallow the combination of EXPORT_SYMBOL and __meminit* + +From: Masahiro Yamada + +[ Upstream commit a3df1526da480c089c20868b7f4d486b9f266001 ] + +Theoretically, we could export conditionally-discarded code sections, +such as .meminit*, if all the users can become modular under a certain +condition. However, that would be difficult to control and such a tricky +case has never occurred. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 0426c1bf3a69c..c4c09e28dc902 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -1176,7 +1176,7 @@ static void check_export_symbol(struct module *mod, struct elf_info *elf, + ELF_ST_TYPE(sym->st_info) == STT_LOPROC) + s->is_func = true; + +- if (match(secname, PATTERNS(INIT_SECTIONS))) ++ if (match(secname, PATTERNS(ALL_INIT_SECTIONS))) + warn("%s: %s: EXPORT_SYMBOL used for init symbol. Remove __init or EXPORT_SYMBOL.\n", + mod->name, name); + else if (match(secname, PATTERNS(ALL_EXIT_SECTIONS))) +-- +2.43.0 + diff --git a/queue-6.6/modpost-remove-all_exit_data_sections-macro.patch b/queue-6.6/modpost-remove-all_exit_data_sections-macro.patch new file mode 100644 index 00000000000..fd4d6b622ad --- /dev/null +++ b/queue-6.6/modpost-remove-all_exit_data_sections-macro.patch @@ -0,0 +1,34 @@ +From 7116b59c295027d6b8e0ed45bbca701cf3399950 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:04 +0900 +Subject: modpost: remove ALL_EXIT_DATA_SECTIONS macro + +From: Masahiro Yamada + +[ Upstream commit 3ada34b0f6559b2388f1983366614fbe8027b6fd ] + +This is unused. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 828d5cc367169..f6cbf70e455ee 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -794,8 +794,6 @@ static void check_section(const char *modname, struct elf_info *elf, + #define ALL_INIT_DATA_SECTIONS \ + ".init.setup", ".init.rodata", ".meminit.rodata", \ + ".init.data", ".meminit.data" +-#define ALL_EXIT_DATA_SECTIONS \ +- ".exit.data", ".memexit.data" + + #define ALL_INIT_TEXT_SECTIONS \ + ".init.text", ".meminit.text" +-- +2.43.0 + diff --git a/queue-6.6/modpost-remove-exit_sections-macro.patch b/queue-6.6/modpost-remove-exit_sections-macro.patch new file mode 100644 index 00000000000..5ab65a89538 --- /dev/null +++ b/queue-6.6/modpost-remove-exit_sections-macro.patch @@ -0,0 +1,61 @@ +From fdd2cd0c76871dd5cd854789b0b737c440aa944a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:09 +0900 +Subject: modpost: remove EXIT_SECTIONS macro + +From: Masahiro Yamada + +[ Upstream commit 48cd8df7afd1eef22cf7b125697a6d7c3d168c5c ] + +ALL_EXIT_SECTIONS and EXIT_SECTIONS are the same. Remove the latter. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index e43862cd002e2..0426c1bf3a69c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -808,7 +808,7 @@ static void check_section(const char *modname, struct elf_info *elf, + #define ALL_XXXINIT_SECTIONS ".meminit.*" + + #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS +-#define ALL_EXIT_SECTIONS EXIT_SECTIONS ++#define ALL_EXIT_SECTIONS ".exit.*" + + #define DATA_SECTIONS ".data", ".data.rel" + #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ +@@ -820,8 +820,6 @@ static void check_section(const char *modname, struct elf_info *elf, + + #define INIT_SECTIONS ".init.*" + +-#define EXIT_SECTIONS ".exit.*" +- + #define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ + TEXT_SECTIONS, OTHER_TEXT_SECTIONS + +@@ -1013,7 +1011,7 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && +- match(tosec, PATTERNS(EXIT_SECTIONS)) && ++ match(tosec, PATTERNS(ALL_EXIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + +@@ -1181,7 +1179,7 @@ static void check_export_symbol(struct module *mod, struct elf_info *elf, + if (match(secname, PATTERNS(INIT_SECTIONS))) + warn("%s: %s: EXPORT_SYMBOL used for init symbol. Remove __init or EXPORT_SYMBOL.\n", + mod->name, name); +- else if (match(secname, PATTERNS(EXIT_SECTIONS))) ++ else if (match(secname, PATTERNS(ALL_EXIT_SECTIONS))) + warn("%s: %s: EXPORT_SYMBOL used for exit symbol. Remove __exit or EXPORT_SYMBOL.\n", + mod->name, name); + } +-- +2.43.0 + diff --git a/queue-6.6/modpost-remove-incorrect-code-in-do_eisa_entry.patch b/queue-6.6/modpost-remove-incorrect-code-in-do_eisa_entry.patch new file mode 100644 index 00000000000..1da7765369d --- /dev/null +++ b/queue-6.6/modpost-remove-incorrect-code-in-do_eisa_entry.patch @@ -0,0 +1,86 @@ +From f58aba9f450415e1079fb0287cb491b0d204a848 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Nov 2024 08:56:39 +0900 +Subject: modpost: remove incorrect code in do_eisa_entry() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Masahiro Yamada + +[ Upstream commit 0c3e091319e4748cb36ac9a50848903dc6f54054 ] + +This function contains multiple bugs after the following commits: + + - ac551828993e ("modpost: i2c aliases need no trailing wildcard") + - 6543becf26ff ("mod/file2alias: make modalias generation safe for cross compiling") + +Commit ac551828993e inserted the following code to do_eisa_entry(): + +    else +            strcat(alias, "*"); + +This is incorrect because 'alias' is uninitialized. If it is not +NULL-terminated, strcat() could cause a buffer overrun. + +Even if 'alias' happens to be zero-filled, it would output: + + MODULE_ALIAS("*"); + +This would match anything. As a result, the module could be loaded by +any unrelated uevent from an unrelated subsystem. + +Commit ac551828993e introduced another bug.             + +Prior to that commit, the conditional check was: + +    if (eisa->sig[0]) + +This checked if the first character of eisa_device_id::sig was not '\0'. + +However, commit ac551828993e changed it as follows: + +    if (sig[0]) + +sig[0] is NOT the first character of the eisa_device_id::sig. The +type of 'sig' is 'char (*)[8]', meaning that the type of 'sig[0]' is +'char [8]' instead of 'char'. 'sig[0]' and 'symval' refer to the same +address, which never becomes NULL. + +The correct conversion would have been: + +    if ((*sig)[0]) + +However, this if-conditional was meaningless because the earlier change +in commit ac551828993e was incorrect. + +This commit removes the entire incorrect code, which should never have +been executed. + +Fixes: ac551828993e ("modpost: i2c aliases need no trailing wildcard") +Fixes: 6543becf26ff ("mod/file2alias: make modalias generation safe for cross compiling") +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + scripts/mod/file2alias.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c +index 6583b36dbe694..efbb4836ec668 100644 +--- a/scripts/mod/file2alias.c ++++ b/scripts/mod/file2alias.c +@@ -809,10 +809,7 @@ static int do_eisa_entry(const char *filename, void *symval, + char *alias) + { + DEF_FIELD_ADDR(symval, eisa_device_id, sig); +- if (sig[0]) +- sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", *sig); +- else +- strcat(alias, "*"); ++ sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", *sig); + return 1; + } + +-- +2.43.0 + diff --git a/queue-6.6/modpost-remove-mem_init_sections-macro.patch b/queue-6.6/modpost-remove-mem_init_sections-macro.patch new file mode 100644 index 00000000000..20383580809 --- /dev/null +++ b/queue-6.6/modpost-remove-mem_init_sections-macro.patch @@ -0,0 +1,43 @@ +From 497dd22cceeb67c898718011df1f2ac9f2cb7183 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:08 +0900 +Subject: modpost: remove MEM_INIT_SECTIONS macro + +From: Masahiro Yamada + +[ Upstream commit 473a45bb35f080e31cb4fe45e905bfe3bd407fdf ] + +ALL_XXXINIT_SECTIONS and MEM_INIT_SECTIONS are the same. +Remove the latter. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 7e88e6437540e..e43862cd002e2 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -805,7 +805,7 @@ static void check_section(const char *modname, struct elf_info *elf, + ".pci_fixup_enable", ".pci_fixup_resume", \ + ".pci_fixup_resume_early", ".pci_fixup_suspend" + +-#define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS ++#define ALL_XXXINIT_SECTIONS ".meminit.*" + + #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS + #define ALL_EXIT_SECTIONS EXIT_SECTIONS +@@ -819,7 +819,6 @@ static void check_section(const char *modname, struct elf_info *elf, + ".coldtext", ".softirqentry.text" + + #define INIT_SECTIONS ".init.*" +-#define MEM_INIT_SECTIONS ".meminit.*" + + #define EXIT_SECTIONS ".exit.*" + +-- +2.43.0 + diff --git a/queue-6.6/modpost-squash-all_-init-exit-_text_sections-to-all_.patch b/queue-6.6/modpost-squash-all_-init-exit-_text_sections-to-all_.patch new file mode 100644 index 00000000000..188468f7503 --- /dev/null +++ b/queue-6.6/modpost-squash-all_-init-exit-_text_sections-to-all_.patch @@ -0,0 +1,47 @@ +From 97e4b38a77ed270278b1ce8784a7cf2fe0ea9520 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:13 +0900 +Subject: modpost: squash ALL_{INIT,EXIT}_TEXT_SECTIONS to ALL_TEXT_SECTIONS + +From: Masahiro Yamada + +[ Upstream commit 34fcf231dcf94d7dea29c070228c4b93849f4850 ] + +ALL_INIT_TEXT_SECTIONS and ALL_EXIT_TEXT_SECTIONS are only used in +the macro definition of ALL_TEXT_SECTIONS. + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 413da4c93b78e..bd559361ecd27 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -795,11 +795,6 @@ static void check_section(const char *modname, struct elf_info *elf, + ".init.setup", ".init.rodata", ".meminit.rodata", \ + ".init.data", ".meminit.data" + +-#define ALL_INIT_TEXT_SECTIONS \ +- ".init.text", ".meminit.text" +-#define ALL_EXIT_TEXT_SECTIONS \ +- ".exit.text" +- + #define ALL_PCI_INIT_SECTIONS \ + ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ + ".pci_fixup_enable", ".pci_fixup_resume", \ +@@ -820,7 +815,7 @@ static void check_section(const char *modname, struct elf_info *elf, + + #define INIT_SECTIONS ".init.*" + +-#define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ ++#define ALL_TEXT_SECTIONS ".init.text", ".meminit.text", ".exit.text", \ + TEXT_SECTIONS, OTHER_TEXT_SECTIONS + + enum mismatch { +-- +2.43.0 + diff --git a/queue-6.6/modpost-use-all_init_sections-for-the-section-check-.patch b/queue-6.6/modpost-use-all_init_sections-for-the-section-check-.patch new file mode 100644 index 00000000000..b09053137be --- /dev/null +++ b/queue-6.6/modpost-use-all_init_sections-for-the-section-check-.patch @@ -0,0 +1,37 @@ +From 8e64208490dc7f48caf0163d9618ce42fb84783c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Oct 2023 02:06:11 +0900 +Subject: modpost: use ALL_INIT_SECTIONS for the section check from + DATA_SECTIONS + +From: Masahiro Yamada + +[ Upstream commit e578e4e3110635b20786e442baa3aeff9bb65f95 ] + +ALL_INIT_SECTIONS is defined as follows: + + #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS + +Signed-off-by: Masahiro Yamada +Stable-dep-of: bb43a59944f4 ("Rename .data.unlikely to .data..unlikely") +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index c4c09e28dc902..413da4c93b78e 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -864,7 +864,7 @@ static const struct sectioncheck sectioncheck[] = { + }, + { + .fromsec = { DATA_SECTIONS, NULL }, +- .bad_tosec = { ALL_XXXINIT_SECTIONS, INIT_SECTIONS, NULL }, ++ .bad_tosec = { ALL_INIT_SECTIONS, NULL }, + .mismatch = DATA_TO_ANY_INIT, + }, + { +-- +2.43.0 + diff --git a/queue-6.6/nfs-ignore-sb_rdonly-when-mounting-nfs.patch b/queue-6.6/nfs-ignore-sb_rdonly-when-mounting-nfs.patch new file mode 100644 index 00000000000..a588df06de3 --- /dev/null +++ b/queue-6.6/nfs-ignore-sb_rdonly-when-mounting-nfs.patch @@ -0,0 +1,79 @@ +From d9e574a50d8793e7c1ba3d31df4a0a1fc8fedc34 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Nov 2024 12:53:03 +0800 +Subject: nfs: ignore SB_RDONLY when mounting nfs + +From: Li Lingfeng + +[ Upstream commit 52cb7f8f177878b4f22397b9c4d2c8f743766be3 ] + +When exporting only one file system with fsid=0 on the server side, the +client alternately uses the ro/rw mount options to perform the mount +operation, and a new vfsmount is generated each time. + +It can be reproduced as follows: +[root@localhost ~]# mount /dev/sda /mnt2 +[root@localhost ~]# echo "/mnt2 *(rw,no_root_squash,fsid=0)" >/etc/exports +[root@localhost ~]# systemctl restart nfs-server +[root@localhost ~]# mount -t nfs -o ro,vers=4 127.0.0.1:/ /mnt/sdaa +[root@localhost ~]# mount -t nfs -o rw,vers=4 127.0.0.1:/ /mnt/sdaa +[root@localhost ~]# mount -t nfs -o ro,vers=4 127.0.0.1:/ /mnt/sdaa +[root@localhost ~]# mount -t nfs -o rw,vers=4 127.0.0.1:/ /mnt/sdaa +[root@localhost ~]# mount | grep nfs4 +127.0.0.1:/ on /mnt/sdaa type nfs4 (ro,relatime,vers=4.2,rsize=1048576,... +127.0.0.1:/ on /mnt/sdaa type nfs4 (rw,relatime,vers=4.2,rsize=1048576,... +127.0.0.1:/ on /mnt/sdaa type nfs4 (ro,relatime,vers=4.2,rsize=1048576,... +127.0.0.1:/ on /mnt/sdaa type nfs4 (rw,relatime,vers=4.2,rsize=1048576,... +[root@localhost ~]# + +We expected that after mounting with the ro option, using the rw option to +mount again would return EBUSY, but the actual situation was not the case. + +As shown above, when mounting for the first time, a superblock with the ro +flag will be generated, and at the same time, in do_new_mount_fc --> +do_add_mount, it detects that the superblock corresponding to the current +target directory is inconsistent with the currently generated one +(path->mnt->mnt_sb != newmnt->mnt.mnt_sb), and a new vfsmount will be +generated. + +When mounting with the rw option for the second time, since no matching +superblock can be found in the fs_supers list, a new superblock with the +rw flag will be generated again. The superblock in use (ro) is different +from the newly generated superblock (rw), and a new vfsmount will be +generated again. + +When mounting with the ro option for the third time, the superblock (ro) +is found in fs_supers, the superblock in use (rw) is different from the +found superblock (ro), and a new vfsmount will be generated again. + +We can switch between ro/rw through remount, and only one superblock needs +to be generated, thus avoiding the problem of repeated generation of +vfsmount caused by switching superblocks. + +Furthermore, This can also resolve the issue described in the link. + +Fixes: 275a5d24bf56 ("NFS: Error when mounting the same filesystem with different options") +Link: https://lore.kernel.org/all/20240604112636.236517-3-lilingfeng@huaweicloud.com/ +Signed-off-by: Li Lingfeng +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/internal.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 8bceaac2205c8..a92b234ae0870 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -11,7 +11,7 @@ + #include + #include + +-#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) ++#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) + + extern const struct export_operations nfs_export_ops; + +-- +2.43.0 + diff --git a/queue-6.6/nfsv4.0-fix-a-use-after-free-problem-in-the-asynchro.patch b/queue-6.6/nfsv4.0-fix-a-use-after-free-problem-in-the-asynchro.patch new file mode 100644 index 00000000000..5e27bd8614a --- /dev/null +++ b/queue-6.6/nfsv4.0-fix-a-use-after-free-problem-in-the-asynchro.patch @@ -0,0 +1,52 @@ +From b138adc952d563dcbcd367b1bbfae5bded9dd928 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 12:13:31 -0500 +Subject: NFSv4.0: Fix a use-after-free problem in the asynchronous open() + +From: Trond Myklebust + +[ Upstream commit 2fdb05dc0931250574f0cb0ebeb5ed8e20f4a889 ] + +Yang Erkun reports that when two threads are opening files at the same +time, and are forced to abort before a reply is seen, then the call to +nfs_release_seqid() in nfs4_opendata_free() can result in a +use-after-free of the pointer to the defunct rpc task of the other +thread. +The fix is to ensure that if the RPC call is aborted before the call to +nfs_wait_on_sequence() is complete, then we must call nfs_release_seqid() +in nfs4_open_release() before the rpc_task is freed. + +Reported-by: Yang Erkun +Fixes: 24ac23ab88df ("NFSv4: Convert open() into an asynchronous RPC call") +Reviewed-by: Yang Erkun +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 299ea2b86df66..4b12e45f57539 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -2528,12 +2528,14 @@ static void nfs4_open_release(void *calldata) + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; + ++ /* In case of error, no cleanup! */ ++ if (data->rpc_status != 0 || !data->rpc_done) { ++ nfs_release_seqid(data->o_arg.seqid); ++ goto out_free; ++ } + /* If this request hasn't been cancelled, do nothing */ + if (!data->cancelled) + goto out_free; +- /* In case of error, no cleanup! */ +- if (data->rpc_status != 0 || !data->rpc_done) +- goto out_free; + /* In case we need an open_confirm, no cleanup! */ + if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) + goto out_free; +-- +2.43.0 + diff --git a/queue-6.6/nvme-multipath-avoid-hang-on-inaccessible-namespaces.patch b/queue-6.6/nvme-multipath-avoid-hang-on-inaccessible-namespaces.patch new file mode 100644 index 00000000000..780f7ae5731 --- /dev/null +++ b/queue-6.6/nvme-multipath-avoid-hang-on-inaccessible-namespaces.patch @@ -0,0 +1,90 @@ +From 43de759bd1b9fc2d53518667a21c39fedc4e260d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 14 Sep 2024 14:01:23 +0200 +Subject: nvme-multipath: avoid hang on inaccessible namespaces + +From: Hannes Reinecke + +[ Upstream commit 3b97f5a05cfc55e7729ff3769f63eef64e2178bb ] + +During repetitive namespace remapping operations on the target the +namespace might have changed between the time the initial scan +was performed, and partition scan was invoked by device_add_disk() +in nvme_mpath_set_live(). We then end up with a stuck scanning process: + +[<0>] folio_wait_bit_common+0x12a/0x310 +[<0>] filemap_read_folio+0x97/0xd0 +[<0>] do_read_cache_folio+0x108/0x390 +[<0>] read_part_sector+0x31/0xa0 +[<0>] read_lba+0xc5/0x160 +[<0>] efi_partition+0xd9/0x8f0 +[<0>] bdev_disk_changed+0x23d/0x6d0 +[<0>] blkdev_get_whole+0x78/0xc0 +[<0>] bdev_open+0x2c6/0x3b0 +[<0>] bdev_file_open_by_dev+0xcb/0x120 +[<0>] disk_scan_partitions+0x5d/0x100 +[<0>] device_add_disk+0x402/0x420 +[<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core] +[<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core] +[<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core] +[<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core] +[<0>] nvme_scan_work+0x1a3/0x490 [nvme_core] + +This happens when we have several paths, some of which are inaccessible, +and the active paths are removed first. Then nvme_find_path() will requeue +I/O in the ns_head (as paths are present), but the requeue list is never +triggered as all remaining paths are inactive. + +This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(), +and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once +the last path has been removed to properly terminate pending I/O. + +Signed-off-by: Hannes Reinecke +Reviewed-by: Sagi Grimberg +Signed-off-by: Keith Busch +Stable-dep-of: 5dd18f09ce73 ("nvme/multipath: Fix RCU list traversal to use SRCU primitive") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/multipath.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index 2fa137738ac8d..989d1e50fb8cc 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -420,6 +420,9 @@ static bool nvme_available_path(struct nvme_ns_head *head) + { + struct nvme_ns *ns; + ++ if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) ++ return NULL; ++ + list_for_each_entry_rcu(ns, &head->list, siblings) { + if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) + continue; +@@ -996,8 +999,7 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) + { + if (!head->disk) + return; +- kblockd_schedule_work(&head->requeue_work); +- if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { ++ if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + nvme_cdev_del(&head->cdev, &head->cdev_device); + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared +@@ -1007,6 +1009,12 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) + kblockd_schedule_work(&head->requeue_work); + del_gendisk(head->disk); + } ++ /* ++ * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared ++ * to allow multipath to fail all I/O. ++ */ ++ synchronize_srcu(&head->srcu); ++ kblockd_schedule_work(&head->requeue_work); + } + + void nvme_mpath_remove_disk(struct nvme_ns_head *head) +-- +2.43.0 + diff --git a/queue-6.6/nvme-multipath-fix-rcu-list-traversal-to-use-srcu-pr.patch b/queue-6.6/nvme-multipath-fix-rcu-list-traversal-to-use-srcu-pr.patch new file mode 100644 index 00000000000..05841d92931 --- /dev/null +++ b/queue-6.6/nvme-multipath-fix-rcu-list-traversal-to-use-srcu-pr.patch @@ -0,0 +1,107 @@ +From 82f7db6330f19dc48b8ae3b68f8580eeba6b8c7b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Nov 2024 06:42:46 -0800 +Subject: nvme/multipath: Fix RCU list traversal to use SRCU primitive + +From: Breno Leitao + +[ Upstream commit 5dd18f09ce7399df6fffe80d1598add46c395ae9 ] + +The code currently uses list_for_each_entry_rcu() while holding an SRCU +lock, triggering false positive warnings with CONFIG_PROVE_RCU=y +enabled: + + drivers/nvme/host/multipath.c:168 RCU-list traversed in non-reader section!! + drivers/nvme/host/multipath.c:227 RCU-list traversed in non-reader section!! + drivers/nvme/host/multipath.c:260 RCU-list traversed in non-reader section!! + +While the list is properly protected by SRCU lock, the code uses the +wrong list traversal primitive. Replace list_for_each_entry_rcu() with +list_for_each_entry_srcu() to correctly indicate SRCU-based protection +and eliminate the false warning. + +Signed-off-by: Breno Leitao +Fixes: be647e2c76b2 ("nvme: use srcu for iterating namespace list") +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/multipath.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index 989d1e50fb8cc..32283301199f0 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -165,7 +165,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) + int srcu_idx; + + srcu_idx = srcu_read_lock(&ctrl->srcu); +- list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { ++ list_for_each_entry_srcu(ns, &ctrl->namespaces, list, ++ srcu_read_lock_held(&ctrl->srcu)) { + if (!ns->head->disk) + continue; + kblockd_schedule_work(&ns->head->requeue_work); +@@ -209,7 +210,8 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) + int srcu_idx; + + srcu_idx = srcu_read_lock(&ctrl->srcu); +- list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { ++ list_for_each_entry_srcu(ns, &ctrl->namespaces, list, ++ srcu_read_lock_held(&ctrl->srcu)) { + nvme_mpath_clear_current_path(ns); + kblockd_schedule_work(&ns->head->requeue_work); + } +@@ -224,7 +226,8 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) + int srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); +- list_for_each_entry_rcu(ns, &head->list, siblings) { ++ list_for_each_entry_srcu(ns, &head->list, siblings, ++ srcu_read_lock_held(&head->srcu)) { + if (capacity != get_capacity(ns->disk)) + clear_bit(NVME_NS_READY, &ns->flags); + } +@@ -256,7 +259,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) + int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; + struct nvme_ns *found = NULL, *fallback = NULL, *ns; + +- list_for_each_entry_rcu(ns, &head->list, siblings) { ++ list_for_each_entry_srcu(ns, &head->list, siblings, ++ srcu_read_lock_held(&head->srcu)) { + if (nvme_path_is_disabled(ns)) + continue; + +@@ -355,7 +359,8 @@ static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head) + unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX; + unsigned int depth; + +- list_for_each_entry_rcu(ns, &head->list, siblings) { ++ list_for_each_entry_srcu(ns, &head->list, siblings, ++ srcu_read_lock_held(&head->srcu)) { + if (nvme_path_is_disabled(ns)) + continue; + +@@ -423,7 +428,8 @@ static bool nvme_available_path(struct nvme_ns_head *head) + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + return NULL; + +- list_for_each_entry_rcu(ns, &head->list, siblings) { ++ list_for_each_entry_srcu(ns, &head->list, siblings, ++ srcu_read_lock_held(&head->srcu)) { + if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) + continue; + switch (ns->ctrl->state) { +@@ -784,7 +790,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, + return 0; + + srcu_idx = srcu_read_lock(&ctrl->srcu); +- list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { ++ list_for_each_entry_srcu(ns, &ctrl->namespaces, list, ++ srcu_read_lock_held(&ctrl->srcu)) { + unsigned nsid; + again: + nsid = le32_to_cpu(desc->nsids[n]); +-- +2.43.0 + diff --git a/queue-6.6/nvme-multipath-implement-queue-depth-iopolicy.patch b/queue-6.6/nvme-multipath-implement-queue-depth-iopolicy.patch new file mode 100644 index 00000000000..36e78632d00 --- /dev/null +++ b/queue-6.6/nvme-multipath-implement-queue-depth-iopolicy.patch @@ -0,0 +1,261 @@ +From fefa75a75df58148f282665733c5c422b67b37de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jun 2024 08:26:05 -0400 +Subject: nvme-multipath: implement "queue-depth" iopolicy + +From: Thomas Song + +[ Upstream commit f227345f0a70f011647ae7ae12778bf258ff71f2 ] + +The round-robin path selector is inefficient in cases where there is a +difference in latency between paths. In the presence of one or more +high latency paths the round-robin selector continues to use the high +latency path equally. This results in a bias towards the highest latency +path and can cause a significant decrease in overall performance as IOs +pile on the highest latency path. This problem is acute with NVMe-oF +controllers. + +The queue-depth path selector sends I/O down the path with the lowest +number of requests in its request queue. Paths with lower latency will +clear requests more quickly and have less requests queued compared to +higher latency paths. The goal of this path selector is to make more use +of lower latency paths which will bring down overall IO latency and +increase throughput and performance. + +Signed-off-by: Thomas Song +[emilne: commandeered patch developed by Thomas Song @ Pure Storage] +Co-developed-by: Ewan D. Milne +Signed-off-by: Ewan D. Milne +Co-developed-by: John Meneghini +Signed-off-by: John Meneghini +Link: https://lore.kernel.org/linux-nvme/20240509202929.831680-1-jmeneghi@redhat.com/ +Tested-by: Marco Patalano +Tested-by: Jyoti Rani +Tested-by: John Meneghini +Reviewed-by: Randy Jennings +Reviewed-by: Hannes Reinecke +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Christoph Hellwig +Signed-off-by: Keith Busch +Stable-dep-of: 5dd18f09ce73 ("nvme/multipath: Fix RCU list traversal to use SRCU primitive") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 2 +- + drivers/nvme/host/multipath.c | 86 +++++++++++++++++++++++++++++++++-- + drivers/nvme/host/nvme.h | 4 ++ + 3 files changed, 87 insertions(+), 5 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 965ca7d7a3de2..5b6a6bd4e6e80 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -109,7 +109,7 @@ struct workqueue_struct *nvme_delete_wq; + EXPORT_SYMBOL_GPL(nvme_delete_wq); + + static LIST_HEAD(nvme_subsystems); +-static DEFINE_MUTEX(nvme_subsystems_lock); ++DEFINE_MUTEX(nvme_subsystems_lock); + + static DEFINE_IDA(nvme_instance_ida); + static dev_t nvme_ctrl_base_chr_devt; +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index 53eee6fc68392..2fa137738ac8d 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -17,6 +17,7 @@ MODULE_PARM_DESC(multipath, + static const char *nvme_iopolicy_names[] = { + [NVME_IOPOLICY_NUMA] = "numa", + [NVME_IOPOLICY_RR] = "round-robin", ++ [NVME_IOPOLICY_QD] = "queue-depth", + }; + + static int iopolicy = NVME_IOPOLICY_NUMA; +@@ -29,6 +30,8 @@ static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp) + iopolicy = NVME_IOPOLICY_NUMA; + else if (!strncmp(val, "round-robin", 11)) + iopolicy = NVME_IOPOLICY_RR; ++ else if (!strncmp(val, "queue-depth", 11)) ++ iopolicy = NVME_IOPOLICY_QD; + else + return -EINVAL; + +@@ -43,7 +46,7 @@ static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp) + module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy, + &iopolicy, 0644); + MODULE_PARM_DESC(iopolicy, +- "Default multipath I/O policy; 'numa' (default) or 'round-robin'"); ++ "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'"); + + void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys) + { +@@ -128,6 +131,11 @@ void nvme_mpath_start_request(struct request *rq) + struct nvme_ns *ns = rq->q->queuedata; + struct gendisk *disk = ns->head->disk; + ++ if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) { ++ atomic_inc(&ns->ctrl->nr_active); ++ nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; ++ } ++ + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) + return; + +@@ -141,6 +149,9 @@ void nvme_mpath_end_request(struct request *rq) + { + struct nvme_ns *ns = rq->q->queuedata; + ++ if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE) ++ atomic_dec_if_positive(&ns->ctrl->nr_active); ++ + if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) + return; + bdev_end_io_acct(ns->head->disk->part0, req_op(rq), +@@ -338,6 +349,42 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head) + return found; + } + ++static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head) ++{ ++ struct nvme_ns *best_opt = NULL, *best_nonopt = NULL, *ns; ++ unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX; ++ unsigned int depth; ++ ++ list_for_each_entry_rcu(ns, &head->list, siblings) { ++ if (nvme_path_is_disabled(ns)) ++ continue; ++ ++ depth = atomic_read(&ns->ctrl->nr_active); ++ ++ switch (ns->ana_state) { ++ case NVME_ANA_OPTIMIZED: ++ if (depth < min_depth_opt) { ++ min_depth_opt = depth; ++ best_opt = ns; ++ } ++ break; ++ case NVME_ANA_NONOPTIMIZED: ++ if (depth < min_depth_nonopt) { ++ min_depth_nonopt = depth; ++ best_nonopt = ns; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ if (min_depth_opt == 0) ++ return best_opt; ++ } ++ ++ return best_opt ? best_opt : best_nonopt; ++} ++ + static inline bool nvme_path_is_optimized(struct nvme_ns *ns) + { + return ns->ctrl->state == NVME_CTRL_LIVE && +@@ -359,9 +406,14 @@ static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head) + + inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) + { +- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR) ++ switch (READ_ONCE(head->subsys->iopolicy)) { ++ case NVME_IOPOLICY_QD: ++ return nvme_queue_depth_path(head); ++ case NVME_IOPOLICY_RR: + return nvme_round_robin_path(head); +- return nvme_numa_path(head); ++ default: ++ return nvme_numa_path(head); ++ } + } + + static bool nvme_available_path(struct nvme_ns_head *head) +@@ -836,6 +888,29 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev, + nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); + } + ++static void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys, ++ int iopolicy) ++{ ++ struct nvme_ctrl *ctrl; ++ int old_iopolicy = READ_ONCE(subsys->iopolicy); ++ ++ if (old_iopolicy == iopolicy) ++ return; ++ ++ WRITE_ONCE(subsys->iopolicy, iopolicy); ++ ++ /* iopolicy changes clear the mpath by design */ ++ mutex_lock(&nvme_subsystems_lock); ++ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) ++ nvme_mpath_clear_ctrl_paths(ctrl); ++ mutex_unlock(&nvme_subsystems_lock); ++ ++ pr_notice("subsysnqn %s iopolicy changed from %s to %s\n", ++ subsys->subnqn, ++ nvme_iopolicy_names[old_iopolicy], ++ nvme_iopolicy_names[iopolicy]); ++} ++ + static ssize_t nvme_subsys_iopolicy_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) + { +@@ -845,7 +920,7 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev, + + for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) { + if (sysfs_streq(buf, nvme_iopolicy_names[i])) { +- WRITE_ONCE(subsys->iopolicy, i); ++ nvme_subsys_iopolicy_update(subsys, i); + return count; + } + } +@@ -963,6 +1038,9 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) + !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) + return 0; + ++ /* initialize this in the identify path to cover controller resets */ ++ atomic_set(&ctrl->nr_active, 0); ++ + if (!ctrl->max_namespaces || + ctrl->max_namespaces > le32_to_cpu(id->nn)) { + dev_err(ctrl->device, +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 14a867245c29f..bddc068d58c7e 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -48,6 +48,7 @@ extern unsigned int admin_timeout; + extern struct workqueue_struct *nvme_wq; + extern struct workqueue_struct *nvme_reset_wq; + extern struct workqueue_struct *nvme_delete_wq; ++extern struct mutex nvme_subsystems_lock; + + /* + * List of workarounds for devices that required behavior not specified in +@@ -199,6 +200,7 @@ enum { + NVME_REQ_CANCELLED = (1 << 0), + NVME_REQ_USERCMD = (1 << 1), + NVME_MPATH_IO_STATS = (1 << 2), ++ NVME_MPATH_CNT_ACTIVE = (1 << 3), + }; + + static inline struct nvme_request *nvme_req(struct request *req) +@@ -364,6 +366,7 @@ struct nvme_ctrl { + size_t ana_log_size; + struct timer_list anatt_timer; + struct work_struct ana_work; ++ atomic_t nr_active; + #endif + + #ifdef CONFIG_NVME_AUTH +@@ -411,6 +414,7 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) + enum nvme_iopolicy { + NVME_IOPOLICY_NUMA, + NVME_IOPOLICY_RR, ++ NVME_IOPOLICY_QD, + }; + + struct nvme_subsystem { +-- +2.43.0 + diff --git a/queue-6.6/nvme-multipath-prepare-for-queue-depth-iopolicy.patch b/queue-6.6/nvme-multipath-prepare-for-queue-depth-iopolicy.patch new file mode 100644 index 00000000000..c118f24f527 --- /dev/null +++ b/queue-6.6/nvme-multipath-prepare-for-queue-depth-iopolicy.patch @@ -0,0 +1,78 @@ +From c784734aef4944e365a1b84198bc61a201c8c242 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jun 2024 08:26:04 -0400 +Subject: nvme-multipath: prepare for "queue-depth" iopolicy + +From: John Meneghini + +[ Upstream commit 3d7c2fd2ea704812867f9586270a2516377482a3 ] + +This patch prepares for the introduction of a new iopolicy by breaking up +the nvme_find_path() code path into sub-routines. + +Signed-off-by: John Meneghini +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Keith Busch +Stable-dep-of: 5dd18f09ce73 ("nvme/multipath: Fix RCU list traversal to use SRCU primitive") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/multipath.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index ede2a14dad8be..53eee6fc68392 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -290,10 +290,15 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head, + return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings); + } + +-static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, +- int node, struct nvme_ns *old) ++static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head) + { + struct nvme_ns *ns, *found = NULL; ++ int node = numa_node_id(); ++ struct nvme_ns *old = srcu_dereference(head->current_path[node], ++ &head->srcu); ++ ++ if (unlikely(!old)) ++ return __nvme_find_path(head, node); + + if (list_is_singular(&head->list)) { + if (nvme_path_is_disabled(old)) +@@ -339,7 +344,7 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) + ns->ana_state == NVME_ANA_OPTIMIZED; + } + +-inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) ++static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head) + { + int node = numa_node_id(); + struct nvme_ns *ns; +@@ -347,14 +352,18 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) + ns = srcu_dereference(head->current_path[node], &head->srcu); + if (unlikely(!ns)) + return __nvme_find_path(head, node); +- +- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR) +- return nvme_round_robin_path(head, node, ns); + if (unlikely(!nvme_path_is_optimized(ns))) + return __nvme_find_path(head, node); + return ns; + } + ++inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) ++{ ++ if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR) ++ return nvme_round_robin_path(head); ++ return nvme_numa_path(head); ++} ++ + static bool nvme_available_path(struct nvme_ns_head *head) + { + struct nvme_ns *ns; +-- +2.43.0 + diff --git a/queue-6.6/perf-arm-cmn-ensure-port-and-device-id-bits-are-set-.patch b/queue-6.6/perf-arm-cmn-ensure-port-and-device-id-bits-are-set-.patch new file mode 100644 index 00000000000..7c5a9f65948 --- /dev/null +++ b/queue-6.6/perf-arm-cmn-ensure-port-and-device-id-bits-are-set-.patch @@ -0,0 +1,50 @@ +From 55c8543e15721ba63930f38dec62de0db28b618d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Nov 2024 16:13:34 -0800 +Subject: perf/arm-cmn: Ensure port and device id bits are set properly + +From: Namhyung Kim + +[ Upstream commit dfdf714fed559c09021df1d2a4bb64c0ad5f53bc ] + +The portid_bits and deviceid_bits were set only for XP type nodes in +the arm_cmn_discover() and it confused other nodes to find XP nodes. +Copy the both bits from the XP nodes directly when it sets up a new +node. + +Fixes: e79634b53e39 ("perf/arm-cmn: Refactor node ID handling. Again.") +Signed-off-by: Namhyung Kim +Acked-by: Will Deacon +Reviewed-by: Robin Murphy +Link: https://lore.kernel.org/r/20241121001334.331334-1-namhyung@kernel.org +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + drivers/perf/arm-cmn.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c +index 0b3ce77136456..7bd1733d79770 100644 +--- a/drivers/perf/arm-cmn.c ++++ b/drivers/perf/arm-cmn.c +@@ -2075,8 +2075,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) + continue; + + xp = arm_cmn_node_to_xp(cmn, dn); +- dn->portid_bits = xp->portid_bits; +- dn->deviceid_bits = xp->deviceid_bits; + dn->dtc = xp->dtc; + dn->dtm = xp->dtm; + if (cmn->multi_dtm) +@@ -2307,6 +2305,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) + } + + arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); ++ dn->portid_bits = xp->portid_bits; ++ dn->deviceid_bits = xp->deviceid_bits; + + switch (dn->type) { + case CMN_TYPE_DTC: +-- +2.43.0 + diff --git a/queue-6.6/perf-arm-smmuv3-fix-lockdep-assert-in-event_init.patch b/queue-6.6/perf-arm-smmuv3-fix-lockdep-assert-in-event_init.patch new file mode 100644 index 00000000000..412330c5dd8 --- /dev/null +++ b/queue-6.6/perf-arm-smmuv3-fix-lockdep-assert-in-event_init.patch @@ -0,0 +1,68 @@ +From 24d35b55c03c3189fc1a0d92d27bcf75d9ec3d98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 05:08:05 +0000 +Subject: perf/arm-smmuv3: Fix lockdep assert in ->event_init() + +From: Chun-Tse Shao + +[ Upstream commit 02a55f2743012a8089f09f6867220c3d57f16564 ] + +Same as +https://lore.kernel.org/all/20240514180050.182454-1-namhyung@kernel.org/, +we should skip `for_each_sibling_event()` for group leader since it +doesn't have the ctx yet. + +Fixes: f3c0eba28704 ("perf: Add a few assertions") +Reported-by: Greg Thelen +Cc: Namhyung Kim +Cc: Robin Murphy +Cc: Tuan Phan +Signed-off-by: Chun-Tse Shao +Acked-by: Will Deacon +Link: https://lore.kernel.org/r/20241108050806.3730811-1-ctshao@google.com +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + drivers/perf/arm_smmuv3_pmu.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c +index 6303b82566f98..31e491e7f2065 100644 +--- a/drivers/perf/arm_smmuv3_pmu.c ++++ b/drivers/perf/arm_smmuv3_pmu.c +@@ -431,6 +431,17 @@ static int smmu_pmu_event_init(struct perf_event *event) + return -EINVAL; + } + ++ /* ++ * Ensure all events are on the same cpu so all events are in the ++ * same cpu context, to avoid races on pmu_enable etc. ++ */ ++ event->cpu = smmu_pmu->on_cpu; ++ ++ hwc->idx = -1; ++ ++ if (event->group_leader == event) ++ return 0; ++ + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; +@@ -442,14 +453,6 @@ static int smmu_pmu_event_init(struct perf_event *event) + return -EINVAL; + } + +- hwc->idx = -1; +- +- /* +- * Ensure all events are on the same cpu so all events are in the +- * same cpu context, to avoid races on pmu_enable etc. +- */ +- event->cpu = smmu_pmu->on_cpu; +- + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.6/rename-.data.once-to-.data.once-to-fix-resetting-war.patch b/queue-6.6/rename-.data.once-to-.data.once-to-fix-resetting-war.patch new file mode 100644 index 00000000000..d258c8c6b2d --- /dev/null +++ b/queue-6.6/rename-.data.once-to-.data.once-to-fix-resetting-war.patch @@ -0,0 +1,153 @@ +From 742576c576b78291d3859670bdc58b36edf67682 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 01:14:41 +0900 +Subject: Rename .data.once to .data..once to fix resetting WARN*_ONCE + +From: Masahiro Yamada + +[ Upstream commit dbefa1f31a91670c9e7dac9b559625336206466f ] + +Commit b1fca27d384e ("kernel debug: support resetting WARN*_ONCE") +added support for clearing the state of once warnings. However, +it is not functional when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION or +CONFIG_LTO_CLANG is enabled, because .data.once matches the +.data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro. + +Commit cb87481ee89d ("kbuild: linker script do not match C names unless +LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress +the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case, +providing a minimal fix for stable backporting. We were aware this did +not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The +plan was to apply correct fixes and then revert cb87481ee89d. [1] + +Seven years have passed since then, yet the #ifdef workaround remains in +place. Meanwhile, commit b1fca27d384e introduced the .data.once section, +and commit dc5723b02e52 ("kbuild: add support for Clang LTO") extended +the #ifdef. + +Using a ".." separator in the section name fixes the issue for +CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG. + +[1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/ + +Fixes: b1fca27d384e ("kernel debug: support resetting WARN*_ONCE") +Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO") +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + include/asm-generic/vmlinux.lds.h | 2 +- + include/linux/mmdebug.h | 6 +++--- + include/linux/once.h | 4 ++-- + include/linux/once_lite.h | 2 +- + include/net/net_debug.h | 2 +- + mm/internal.h | 2 +- + 6 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index cb12f164caf1e..7e11ca6f86dcd 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -349,7 +349,7 @@ + *(.data..shared_aligned) /* percpu related */ \ + *(.data..unlikely) \ + __start_once = .; \ +- *(.data.once) \ ++ *(.data..once) \ + __end_once = .; \ + STRUCT_ALIGN(); \ + *(__tracepoints) \ +diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h +index 7c3e7b0b0e8fd..28c21d5b25f6b 100644 +--- a/include/linux/mmdebug.h ++++ b/include/linux/mmdebug.h +@@ -46,7 +46,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); + } \ + } while (0) + #define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ +- static bool __section(".data.once") __warned; \ ++ static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ +@@ -66,7 +66,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); + unlikely(__ret_warn); \ + }) + #define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ +- static bool __section(".data.once") __warned; \ ++ static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ +@@ -77,7 +77,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); + unlikely(__ret_warn_once); \ + }) + #define VM_WARN_ON_ONCE_MM(cond, mm) ({ \ +- static bool __section(".data.once") __warned; \ ++ static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ +diff --git a/include/linux/once.h b/include/linux/once.h +index bc714d414448a..30346fcdc7995 100644 +--- a/include/linux/once.h ++++ b/include/linux/once.h +@@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, + #define DO_ONCE(func, ...) \ + ({ \ + bool ___ret = false; \ +- static bool __section(".data.once") ___done = false; \ ++ static bool __section(".data..once") ___done = false; \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ + unsigned long ___flags; \ +@@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, + #define DO_ONCE_SLEEPABLE(func, ...) \ + ({ \ + bool ___ret = false; \ +- static bool __section(".data.once") ___done = false; \ ++ static bool __section(".data..once") ___done = false; \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ + ___ret = __do_once_sleepable_start(&___done); \ +diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h +index b7bce4983638f..27de7bc32a061 100644 +--- a/include/linux/once_lite.h ++++ b/include/linux/once_lite.h +@@ -12,7 +12,7 @@ + + #define __ONCE_LITE_IF(condition) \ + ({ \ +- static bool __section(".data.once") __already_done; \ ++ static bool __section(".data..once") __already_done; \ + bool __ret_cond = !!(condition); \ + bool __ret_once = false; \ + \ +diff --git a/include/net/net_debug.h b/include/net/net_debug.h +index 1e74684cbbdbc..4a79204c8d306 100644 +--- a/include/net/net_debug.h ++++ b/include/net/net_debug.h +@@ -27,7 +27,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...); + + #define netdev_level_once(level, dev, fmt, ...) \ + do { \ +- static bool __section(".data.once") __print_once; \ ++ static bool __section(".data..once") __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ +diff --git a/mm/internal.h b/mm/internal.h +index a0b24d0055795..f773db493a99d 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -40,7 +40,7 @@ struct folio_batch; + * when we specify __GFP_NOWARN. + */ + #define WARN_ON_ONCE_GFP(cond, gfp) ({ \ +- static bool __section(".data.once") __warned; \ ++ static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \ +-- +2.43.0 + diff --git a/queue-6.6/rename-.data.unlikely-to-.data.unlikely.patch b/queue-6.6/rename-.data.unlikely-to-.data.unlikely.patch new file mode 100644 index 00000000000..1e3b72fb3a8 --- /dev/null +++ b/queue-6.6/rename-.data.unlikely-to-.data.unlikely.patch @@ -0,0 +1,67 @@ +From 6746d922f37435675cadb4728cc9a248de35038f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 01:14:40 +0900 +Subject: Rename .data.unlikely to .data..unlikely + +From: Masahiro Yamada + +[ Upstream commit bb43a59944f45e89aa158740b8a16ba8f0b0fa2b ] + +Commit 7ccaba5314ca ("consolidate WARN_...ONCE() static variables") +was intended to collect all .data.unlikely sections into one chunk. +However, this has not worked when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +or CONFIG_LTO_CLANG is enabled, because .data.unlikely matches the +.data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro. + +Commit cb87481ee89d ("kbuild: linker script do not match C names unless +LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress +the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case, +providing a minimal fix for stable backporting. We were aware this did +not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The +plan was to apply correct fixes and then revert cb87481ee89d. [1] + +Seven years have passed since then, yet the #ifdef workaround remains in +place. + +Using a ".." separator in the section name fixes the issue for +CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG. + +[1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/ + +Fixes: cb87481ee89d ("kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured") +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + include/asm-generic/vmlinux.lds.h | 2 +- + include/linux/rcupdate.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 5793aedb24c6d..cb12f164caf1e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -347,7 +347,7 @@ + *(.data..decrypted) \ + *(.ref.data) \ + *(.data..shared_aligned) /* percpu related */ \ +- *(.data.unlikely) \ ++ *(.data..unlikely) \ + __start_once = .; \ + *(.data.once) \ + __end_once = .; \ +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 6466c2f792923..7602d1f8a9ecb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -398,7 +398,7 @@ static inline int debug_lockdep_rcu_enabled(void) + */ + #define RCU_LOCKDEP_WARN(c, s) \ + do { \ +- static bool __section(".data.unlikely") __warned; \ ++ static bool __section(".data..unlikely") __warned; \ + if (debug_lockdep_rcu_enabled() && (c) && \ + debug_lockdep_rcu_enabled() && !__warned) { \ + __warned = true; \ +-- +2.43.0 + diff --git a/queue-6.6/rtc-ab-eoz9-don-t-fail-temperature-reads-on-undervol.patch b/queue-6.6/rtc-ab-eoz9-don-t-fail-temperature-reads-on-undervol.patch new file mode 100644 index 00000000000..e00f06f2bc7 --- /dev/null +++ b/queue-6.6/rtc-ab-eoz9-don-t-fail-temperature-reads-on-undervol.patch @@ -0,0 +1,49 @@ +From e35042278b8910ba5c8324bb094edf408e0798a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 11:10:30 +0100 +Subject: rtc: ab-eoz9: don't fail temperature reads on undervoltage + notification + +From: Maxime Chevallier + +[ Upstream commit e0779a0dcf41a6452ac0a169cd96863feb5787c7 ] + +The undervoltage flags reported by the RTC are useful to know if the +time and date are reliable after a reboot. Although the threshold VLOW1 +indicates that the thermometer has been shutdown and time compensation +is off, it doesn't mean that the temperature readout is currently +impossible. + +As the system is running, the RTC voltage is now fully established and +we can read the temperature. + +Fixes: 67075b63cce2 ("rtc: add AB-RTCMC-32.768kHz-EOZ9 RTC support") +Signed-off-by: Maxime Chevallier +Link: https://lore.kernel.org/r/20241122101031.68916-3-maxime.chevallier@bootlin.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-ab-eoz9.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c +index 04e1b8e93bc1c..79d5ee7b818c5 100644 +--- a/drivers/rtc/rtc-ab-eoz9.c ++++ b/drivers/rtc/rtc-ab-eoz9.c +@@ -396,13 +396,6 @@ static int abeoz9z3_temp_read(struct device *dev, + if (ret < 0) + return ret; + +- if ((val & ABEOZ9_REG_CTRL_STATUS_V1F) || +- (val & ABEOZ9_REG_CTRL_STATUS_V2F)) { +- dev_err(dev, +- "thermometer might be disabled due to low voltage\n"); +- return -EINVAL; +- } +- + switch (attr) { + case hwmon_temp_input: + ret = regmap_read(regmap, ABEOZ9_REG_REG_TEMP, &val); +-- +2.43.0 + diff --git a/queue-6.6/rtc-abx80x-fix-wdt-bit-position-of-the-status-regist.patch b/queue-6.6/rtc-abx80x-fix-wdt-bit-position-of-the-status-regist.patch new file mode 100644 index 00000000000..b3799268b47 --- /dev/null +++ b/queue-6.6/rtc-abx80x-fix-wdt-bit-position-of-the-status-regist.patch @@ -0,0 +1,39 @@ +From 20f9f4ed6617dd433fdb3dbeca99be91dffd25b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 13:17:37 +0900 +Subject: rtc: abx80x: Fix WDT bit position of the status register + +From: Nobuhiro Iwamatsu + +[ Upstream commit 10e078b273ee7a2b8b4f05a64ac458f5e652d18d ] + +The WDT bit in the status register is 5, not 6. This fixes from 6 to 5. + +Link: https://abracon.com/Support/AppsManuals/Precisiontiming/AB08XX-Application-Manual.pdf +Link: https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-1805-C3_App-Manual.pdf +Fixes: 749e36d0a0d7 ("rtc: abx80x: add basic watchdog support") +Cc: Jeremy Gebben +Signed-off-by: Nobuhiro Iwamatsu +Link: https://lore.kernel.org/r/20241008041737.1640633-1-iwamatsu@nigauri.org +Signed-off-by: Alexandre Belloni +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-abx80x.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c +index 1298962402ff4..3fee27914ba80 100644 +--- a/drivers/rtc/rtc-abx80x.c ++++ b/drivers/rtc/rtc-abx80x.c +@@ -39,7 +39,7 @@ + #define ABX8XX_REG_STATUS 0x0f + #define ABX8XX_STATUS_AF BIT(2) + #define ABX8XX_STATUS_BLF BIT(4) +-#define ABX8XX_STATUS_WDT BIT(6) ++#define ABX8XX_STATUS_WDT BIT(5) + + #define ABX8XX_REG_CTRL1 0x10 + #define ABX8XX_CTRL_WRITE BIT(0) +-- +2.43.0 + diff --git a/queue-6.6/rtc-check-if-__rtc_read_time-was-successful-in-rtc_t.patch b/queue-6.6/rtc-check-if-__rtc_read_time-was-successful-in-rtc_t.patch new file mode 100644 index 00000000000..f3dfd2d0300 --- /dev/null +++ b/queue-6.6/rtc-check-if-__rtc_read_time-was-successful-in-rtc_t.patch @@ -0,0 +1,53 @@ +From 749326f584ce243af77d538ebe6902c280981ca7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Oct 2024 12:31:53 +0800 +Subject: rtc: check if __rtc_read_time was successful in rtc_timer_do_work() + +From: Yongliang Gao + +[ Upstream commit e8ba8a2bc4f60a1065f23d6a0e7cbea945a0f40d ] + +If the __rtc_read_time call fails,, the struct rtc_time tm; may contain +uninitialized data, or an illegal date/time read from the RTC hardware. + +When calling rtc_tm_to_ktime later, the result may be a very large value +(possibly KTIME_MAX). If there are periodic timers in rtc->timerqueue, +they will continually expire, may causing kernel softlockup. + +Fixes: 6610e0893b8b ("RTC: Rework RTC code to use timerqueue for events") +Signed-off-by: Yongliang Gao +Acked-by: Jingqun Li +Link: https://lore.kernel.org/r/20241011043153.3788112-1-leonylgao@gmail.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Sasha Levin +--- + drivers/rtc/interface.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c +index 0b23706d9fd3c..4a7c41a6c21e7 100644 +--- a/drivers/rtc/interface.c ++++ b/drivers/rtc/interface.c +@@ -904,13 +904,18 @@ void rtc_timer_do_work(struct work_struct *work) + struct timerqueue_node *next; + ktime_t now; + struct rtc_time tm; ++ int err; + + struct rtc_device *rtc = + container_of(work, struct rtc_device, irqwork); + + mutex_lock(&rtc->ops_lock); + again: +- __rtc_read_time(rtc, &tm); ++ err = __rtc_read_time(rtc, &tm); ++ if (err) { ++ mutex_unlock(&rtc->ops_lock); ++ return; ++ } + now = rtc_tm_to_ktime(tm); + while ((next = timerqueue_getnext(&rtc->timerqueue))) { + if (next->expires > now) +-- +2.43.0 + diff --git a/queue-6.6/rtc-rzn1-fix-bcd-to-rtc_time-conversion-errors.patch b/queue-6.6/rtc-rzn1-fix-bcd-to-rtc_time-conversion-errors.patch new file mode 100644 index 00000000000..fd9b249b022 --- /dev/null +++ b/queue-6.6/rtc-rzn1-fix-bcd-to-rtc_time-conversion-errors.patch @@ -0,0 +1,52 @@ +From 7ce700fd92a3bb1e8079d1a8370b6b4bff9877f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Nov 2024 12:30:32 +0100 +Subject: rtc: rzn1: fix BCD to rtc_time conversion errors + +From: Wolfram Sang + +[ Upstream commit 55727188dfa3572aecd946e58fab9e4a64f06894 ] + +tm_mon describes months from 0 to 11, but the register contains BCD from +1 to 12. tm_year contains years since 1900, but the BCD contains 20XX. +Apply the offsets when converting these numbers. + +Fixes: deeb4b5393e1 ("rtc: rzn1: Add new RTC driver") +Signed-off-by: Wolfram Sang +Reviewed-by: Miquel Raynal +Link: https://lore.kernel.org/r/20241113113032.27409-1-wsa+renesas@sang-engineering.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-rzn1.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c +index 56ebbd4d04814..8570c8e63d70c 100644 +--- a/drivers/rtc/rtc-rzn1.c ++++ b/drivers/rtc/rtc-rzn1.c +@@ -111,8 +111,8 @@ static int rzn1_rtc_read_time(struct device *dev, struct rtc_time *tm) + tm->tm_hour = bcd2bin(tm->tm_hour); + tm->tm_wday = bcd2bin(tm->tm_wday); + tm->tm_mday = bcd2bin(tm->tm_mday); +- tm->tm_mon = bcd2bin(tm->tm_mon); +- tm->tm_year = bcd2bin(tm->tm_year); ++ tm->tm_mon = bcd2bin(tm->tm_mon) - 1; ++ tm->tm_year = bcd2bin(tm->tm_year) + 100; + + return 0; + } +@@ -128,8 +128,8 @@ static int rzn1_rtc_set_time(struct device *dev, struct rtc_time *tm) + tm->tm_hour = bin2bcd(tm->tm_hour); + tm->tm_wday = bin2bcd(rzn1_rtc_tm_to_wday(tm)); + tm->tm_mday = bin2bcd(tm->tm_mday); +- tm->tm_mon = bin2bcd(tm->tm_mon); +- tm->tm_year = bin2bcd(tm->tm_year); ++ tm->tm_mon = bin2bcd(tm->tm_mon + 1); ++ tm->tm_year = bin2bcd(tm->tm_year - 100); + + val = readl(rtc->base + RZN1_RTC_CTL2); + if (!(val & RZN1_RTC_CTL2_STOPPED)) { +-- +2.43.0 + diff --git a/queue-6.6/rtc-st-lpc-use-irqf_no_autoen-flag-in-request_irq.patch b/queue-6.6/rtc-st-lpc-use-irqf_no_autoen-flag-in-request_irq.patch new file mode 100644 index 00000000000..57f18314c70 --- /dev/null +++ b/queue-6.6/rtc-st-lpc-use-irqf_no_autoen-flag-in-request_irq.patch @@ -0,0 +1,50 @@ +From baddad6986d72a57397c2730d9a4e55271e7a269 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2024 11:37:27 +0800 +Subject: rtc: st-lpc: Use IRQF_NO_AUTOEN flag in request_irq() + +From: Jinjie Ruan + +[ Upstream commit b6cd7adec0cf03f0aefc55676e71dd721cbc71a8 ] + +If request_irq() fails in st_rtc_probe(), there is no need to enable +the irq, and if it succeeds, disable_irq() after request_irq() still has +a time gap in which interrupts can come. + +request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when +request IRQ. + +Fixes: b5b2bdfc2893 ("rtc: st: Add new driver for ST's LPC RTC") +Signed-off-by: Jinjie Ruan +Link: https://lore.kernel.org/r/20240912033727.3013951-1-ruanjinjie@huawei.com +Signed-off-by: Alexandre Belloni +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-st-lpc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c +index d492a2d26600c..c6d4522411b31 100644 +--- a/drivers/rtc/rtc-st-lpc.c ++++ b/drivers/rtc/rtc-st-lpc.c +@@ -218,15 +218,14 @@ static int st_rtc_probe(struct platform_device *pdev) + return -EINVAL; + } + +- ret = devm_request_irq(&pdev->dev, rtc->irq, st_rtc_handler, 0, +- pdev->name, rtc); ++ ret = devm_request_irq(&pdev->dev, rtc->irq, st_rtc_handler, ++ IRQF_NO_AUTOEN, pdev->name, rtc); + if (ret) { + dev_err(&pdev->dev, "Failed to request irq %i\n", rtc->irq); + return ret; + } + + enable_irq_wake(rtc->irq); +- disable_irq(rtc->irq); + + rtc->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(rtc->clk)) +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index d3f4cefbed3..126163963c4 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -559,3 +559,49 @@ f2fs-fix-to-do-sanity-check-on-node-blkaddr-in-truncate_node.patch ipc-fix-memleak-if-msg_init_ns-failed-in-create_ipc_ns.patch nfsd-prevent-a-potential-integer-overflow.patch sunrpc-make-sure-cache-entry-active-before-cache_show.patch +um-fix-potential-integer-overflow-during-physmem-set.patch +um-fix-the-return-value-of-elf_core_copy_task_fpregs.patch +um-always-dump-trace-for-specified-task-in-show_stac.patch +nfsv4.0-fix-a-use-after-free-problem-in-the-asynchro.patch +rtc-st-lpc-use-irqf_no_autoen-flag-in-request_irq.patch +rtc-abx80x-fix-wdt-bit-position-of-the-status-regist.patch +rtc-check-if-__rtc_read_time-was-successful-in-rtc_t.patch +ubi-fastmap-wl-schedule-fm_work-if-wear-leveling-poo.patch +ubifs-correct-the-total-block-count-by-deducting-jou.patch +ubi-fastmap-fix-duplicate-slab-cache-names-while-att.patch +ubifs-authentication-fix-use-after-free-in-ubifs_tnc.patch +jffs2-fix-use-of-uninitialized-variable.patch +rtc-rzn1-fix-bcd-to-rtc_time-conversion-errors.patch +nvme-multipath-prepare-for-queue-depth-iopolicy.patch +nvme-multipath-implement-queue-depth-iopolicy.patch +nvme-multipath-avoid-hang-on-inaccessible-namespaces.patch +nvme-multipath-fix-rcu-list-traversal-to-use-srcu-pr.patch +block-return-unsigned-int-from-bdev_io_min.patch +9p-xen-fix-init-sequence.patch +9p-xen-fix-release-of-irq.patch +perf-arm-smmuv3-fix-lockdep-assert-in-event_init.patch +perf-arm-cmn-ensure-port-and-device-id-bits-are-set-.patch +smb-client-disable-directory-caching-when-dir_cache_.patch +cifs-fix-parsing-native-symlinks-relative-to-the-exp.patch +cifs-fix-parsing-reparse-point-with-native-symlink-i.patch +rtc-ab-eoz9-don-t-fail-temperature-reads-on-undervol.patch +modpost-remove-all_exit_data_sections-macro.patch +modpost-disallow-driver-to-reference-.meminit-sectio.patch +modpost-remove-mem_init_sections-macro.patch +modpost-remove-exit_sections-macro.patch +modpost-disallow-the-combination-of-export_symbol-an.patch +modpost-use-all_init_sections-for-the-section-check-.patch +modpost-squash-all_-init-exit-_text_sections-to-all_.patch +init-modpost-conditionally-check-section-mismatch-to.patch +rename-.data.unlikely-to-.data.unlikely.patch +rename-.data.once-to-.data.once-to-fix-resetting-war.patch +smb-initialize-cfid-tcon-before-performing-network-o.patch +modpost-remove-incorrect-code-in-do_eisa_entry.patch +cifs-during-remount-make-sure-passwords-are-in-sync.patch +cifs-unlock-on-error-in-smb3_reconfigure.patch +nfs-ignore-sb_rdonly-when-mounting-nfs.patch +sunrpc-clear-xprt_sock_upd_timeout-when-reset-transp.patch +sunrpc-timeout-and-cancel-tls-handshake-with-etimedo.patch +sunrpc-fix-one-uaf-issue-caused-by-sunrpc-kernel-tcp.patch +block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch +sh-intc-fix-use-after-free-bug-in-register_intc_cont.patch diff --git a/queue-6.6/sh-intc-fix-use-after-free-bug-in-register_intc_cont.patch b/queue-6.6/sh-intc-fix-use-after-free-bug-in-register_intc_cont.patch new file mode 100644 index 00000000000..6d462f75c58 --- /dev/null +++ b/queue-6.6/sh-intc-fix-use-after-free-bug-in-register_intc_cont.patch @@ -0,0 +1,46 @@ +From 037cf49e388ef86bd6a75b32cfc26ba473e965dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 11:41:59 +0300 +Subject: sh: intc: Fix use-after-free bug in register_intc_controller() + +From: Dan Carpenter + +[ Upstream commit 63e72e551942642c48456a4134975136cdcb9b3c ] + +In the error handling for this function, d is freed without ever +removing it from intc_list which would lead to a use after free. +To fix this, let's only add it to the list after everything has +succeeded. + +Fixes: 2dcec7a988a1 ("sh: intc: set_irq_wake() support") +Signed-off-by: Dan Carpenter +Reviewed-by: John Paul Adrian Glaubitz +Signed-off-by: John Paul Adrian Glaubitz +Signed-off-by: Sasha Levin +--- + drivers/sh/intc/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c +index ca4f4ca413f11..b19388b349be3 100644 +--- a/drivers/sh/intc/core.c ++++ b/drivers/sh/intc/core.c +@@ -209,7 +209,6 @@ int __init register_intc_controller(struct intc_desc *desc) + goto err0; + + INIT_LIST_HEAD(&d->list); +- list_add_tail(&d->list, &intc_list); + + raw_spin_lock_init(&d->lock); + INIT_RADIX_TREE(&d->tree, GFP_ATOMIC); +@@ -369,6 +368,7 @@ int __init register_intc_controller(struct intc_desc *desc) + + d->skip_suspend = desc->skip_syscore_suspend; + ++ list_add_tail(&d->list, &intc_list); + nr_intc_controllers++; + + return 0; +-- +2.43.0 + diff --git a/queue-6.6/smb-client-disable-directory-caching-when-dir_cache_.patch b/queue-6.6/smb-client-disable-directory-caching-when-dir_cache_.patch new file mode 100644 index 00000000000..389cf697e93 --- /dev/null +++ b/queue-6.6/smb-client-disable-directory-caching-when-dir_cache_.patch @@ -0,0 +1,44 @@ +From 6a0e72ea06c4b77215a250b33a88799023323775 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 22:14:35 -0300 +Subject: smb: client: disable directory caching when dir_cache_timeout is zero + +From: Henrique Carvalho + +[ Upstream commit ceaf1451990e3ea7fb50aebb5a149f57945f6e9f ] + +Setting dir_cache_timeout to zero should disable the caching of +directory contents. Currently, even when dir_cache_timeout is zero, +some caching related functions are still invoked, which is unintended +behavior. + +Fix the issue by setting tcon->nohandlecache to true when +dir_cache_timeout is zero, ensuring that directory handle caching +is properly disabled. + +Fixes: 238b351d0935 ("smb3: allow controlling length of time directory entries are cached with dir leases") +Reviewed-by: Paulo Alcantara (Red Hat) +Reviewed-by: Enzo Matsumiya +Signed-off-by: Henrique Carvalho +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/connect.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c +index 5cb6d1b47415d..7b850c40b2f32 100644 +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -2601,7 +2601,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) + + if (ses->server->dialect >= SMB20_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)) +- nohandlecache = ctx->nohandlecache; ++ nohandlecache = ctx->nohandlecache || !dir_cache_timeout; + else + nohandlecache = true; + tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new); +-- +2.43.0 + diff --git a/queue-6.6/smb-initialize-cfid-tcon-before-performing-network-o.patch b/queue-6.6/smb-initialize-cfid-tcon-before-performing-network-o.patch new file mode 100644 index 00000000000..0774ddeed3f --- /dev/null +++ b/queue-6.6/smb-initialize-cfid-tcon-before-performing-network-o.patch @@ -0,0 +1,45 @@ +From 9dac0059ae041baa5ee141fb46438e5d2a0d1967 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Nov 2024 18:50:31 -0600 +Subject: smb: Initialize cfid->tcon before performing network ops + +From: Paul Aurich + +[ Upstream commit c353ee4fb119a2582d0e011f66a76a38f5cf984d ] + +Avoid leaking a tcon ref when a lease break races with opening the +cached directory. Processing the leak break might take a reference to +the tcon in cached_dir_lease_break() and then fail to release the ref in +cached_dir_offload_close, since cfid->tcon is still NULL. + +Fixes: ebe98f1447bb ("cifs: enable caching of directories for which a lease is held") +Signed-off-by: Paul Aurich +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/cached_dir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c +index 004349a7ab69d..9c0ef4195b582 100644 +--- a/fs/smb/client/cached_dir.c ++++ b/fs/smb/client/cached_dir.c +@@ -227,6 +227,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, + } + } + cfid->dentry = dentry; ++ cfid->tcon = tcon; + + /* + * We do not hold the lock for the open because in case +@@ -298,7 +299,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, + } + goto oshr_free; + } +- cfid->tcon = tcon; + cfid->is_open = true; + + spin_lock(&cfids->cfid_list_lock); +-- +2.43.0 + diff --git a/queue-6.6/sunrpc-clear-xprt_sock_upd_timeout-when-reset-transp.patch b/queue-6.6/sunrpc-clear-xprt_sock_upd_timeout-when-reset-transp.patch new file mode 100644 index 00000000000..f3f1ecd3f6a --- /dev/null +++ b/queue-6.6/sunrpc-clear-xprt_sock_upd_timeout-when-reset-transp.patch @@ -0,0 +1,38 @@ +From 413b2a6b684b341ab9466ecb4a4671269126ec87 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Nov 2024 17:38:04 +0800 +Subject: sunrpc: clear XPRT_SOCK_UPD_TIMEOUT when reset transport + +From: Liu Jian + +[ Upstream commit 4db9ad82a6c823094da27de4825af693a3475d51 ] + +Since transport->sock has been set to NULL during reset transport, +XPRT_SOCK_UPD_TIMEOUT also needs to be cleared. Otherwise, the +xs_tcp_set_socket_timeouts() may be triggered in xs_tcp_send_request() +to dereference the transport->sock that has been set to NULL. + +Fixes: 7196dbb02ea0 ("SUNRPC: Allow changing of the TCP timeout parameters on the fly") +Signed-off-by: Li Lingfeng +Signed-off-by: Liu Jian +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index 50490b1e8a0d0..714da627fba8e 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1186,6 +1186,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) + clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); ++ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); + } + + static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) +-- +2.43.0 + diff --git a/queue-6.6/sunrpc-fix-one-uaf-issue-caused-by-sunrpc-kernel-tcp.patch b/queue-6.6/sunrpc-fix-one-uaf-issue-caused-by-sunrpc-kernel-tcp.patch new file mode 100644 index 00000000000..8869069e6b9 --- /dev/null +++ b/queue-6.6/sunrpc-fix-one-uaf-issue-caused-by-sunrpc-kernel-tcp.patch @@ -0,0 +1,165 @@ +From c301fbb74da21a868cad67dbd1bdaff189d1bc99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 21:54:34 +0800 +Subject: sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket + +From: Liu Jian + +[ Upstream commit 3f23f96528e8fcf8619895c4c916c52653892ec1 ] + +BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 +Read of size 1 at addr ffff888111f322cd by task swapper/0/0 + +CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 +Call Trace: + + dump_stack_lvl+0x68/0xa0 + print_address_description.constprop.0+0x2c/0x3d0 + print_report+0xb4/0x270 + kasan_report+0xbd/0xf0 + tcp_write_timer_handler+0x156/0x3e0 + tcp_write_timer+0x66/0x170 + call_timer_fn+0xfb/0x1d0 + __run_timers+0x3f8/0x480 + run_timer_softirq+0x9b/0x100 + handle_softirqs+0x153/0x390 + __irq_exit_rcu+0x103/0x120 + irq_exit_rcu+0xe/0x20 + sysvec_apic_timer_interrupt+0x76/0x90 + + + asm_sysvec_apic_timer_interrupt+0x1a/0x20 +RIP: 0010:default_idle+0xf/0x20 +Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 + 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc cc cc + cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 +RSP: 0018:ffffffffa2007e28 EFLAGS: 00000242 +RAX: 00000000000f3b31 RBX: 1ffffffff4400fc7 RCX: ffffffffa09c3196 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff9f00590f +RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed102360835d +R10: ffff88811b041aeb R11: 0000000000000001 R12: 0000000000000000 +R13: ffffffffa202d7c0 R14: 0000000000000000 R15: 00000000000147d0 + default_idle_call+0x6b/0xa0 + cpuidle_idle_call+0x1af/0x1f0 + do_idle+0xbc/0x130 + cpu_startup_entry+0x33/0x40 + rest_init+0x11f/0x210 + start_kernel+0x39a/0x420 + x86_64_start_reservations+0x18/0x30 + x86_64_start_kernel+0x97/0xa0 + common_startup_64+0x13e/0x141 + + +Allocated by task 595: + kasan_save_stack+0x24/0x50 + kasan_save_track+0x14/0x30 + __kasan_slab_alloc+0x87/0x90 + kmem_cache_alloc_noprof+0x12b/0x3f0 + copy_net_ns+0x94/0x380 + create_new_namespaces+0x24c/0x500 + unshare_nsproxy_namespaces+0x75/0xf0 + ksys_unshare+0x24e/0x4f0 + __x64_sys_unshare+0x1f/0x30 + do_syscall_64+0x70/0x180 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Freed by task 100: + kasan_save_stack+0x24/0x50 + kasan_save_track+0x14/0x30 + kasan_save_free_info+0x3b/0x60 + __kasan_slab_free+0x54/0x70 + kmem_cache_free+0x156/0x5d0 + cleanup_net+0x5d3/0x670 + process_one_work+0x776/0xa90 + worker_thread+0x2e2/0x560 + kthread+0x1a8/0x1f0 + ret_from_fork+0x34/0x60 + ret_from_fork_asm+0x1a/0x30 + +Reproduction script: + +mkdir -p /mnt/nfsshare +mkdir -p /mnt/nfs/netns_1 +mkfs.ext4 /dev/sdb +mount /dev/sdb /mnt/nfsshare +systemctl restart nfs-server +chmod 777 /mnt/nfsshare +exportfs -i -o rw,no_root_squash *:/mnt/nfsshare + +ip netns add netns_1 +ip link add name veth_1_peer type veth peer veth_1 +ifconfig veth_1_peer 11.11.0.254 up +ip link set veth_1 netns netns_1 +ip netns exec netns_1 ifconfig veth_1 11.11.0.1 + +ip netns exec netns_1 /root/iptables -A OUTPUT -d 11.11.0.254 -p tcp \ + --tcp-flags FIN FIN -j DROP + +(note: In my environment, a DESTROY_CLIENTID operation is always sent + immediately, breaking the nfs tcp connection.) +ip netns exec netns_1 timeout -s 9 300 mount -t nfs -o proto=tcp,vers=4.1 \ + 11.11.0.254:/mnt/nfsshare /mnt/nfs/netns_1 + +ip netns del netns_1 + +The reason here is that the tcp socket in netns_1 (nfs side) has been +shutdown and closed (done in xs_destroy), but the FIN message (with ack) +is discarded, and the nfsd side keeps sending retransmission messages. +As a result, when the tcp sock in netns_1 processes the received message, +it sends the message (FIN message) in the sending queue, and the tcp timer +is re-established. When the network namespace is deleted, the net structure +accessed by tcp's timer handler function causes problems. + +To fix this problem, let's hold netns refcnt for the tcp kernel socket as +done in other modules. This is an ugly hack which can easily be backported +to earlier kernels. A proper fix which cleans up the interfaces will +follow, but may not be so easy to backport. + +Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") +Signed-off-by: Liu Jian +Acked-by: Jeff Layton +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/svcsock.c | 4 ++++ + net/sunrpc/xprtsock.c | 7 +++++++ + 2 files changed, 11 insertions(+) + +diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c +index 933e12e3a55c7..83996eea10062 100644 +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -1562,6 +1562,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, + newlen = error; + + if (protocol == IPPROTO_TCP) { ++ __netns_tracker_free(net, &sock->sk->ns_tracker, false); ++ sock->sk->sk_net_refcnt = 1; ++ get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL); ++ sock_inuse_add(net, 1); + if ((error = kernel_listen(sock, 64)) < 0) + goto bummer; + } +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index c528297245125..1c4bc8234ea87 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1921,6 +1921,13 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, + goto out; + } + ++ if (protocol == IPPROTO_TCP) { ++ __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false); ++ sock->sk->sk_net_refcnt = 1; ++ get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL); ++ sock_inuse_add(xprt->xprt_net, 1); ++ } ++ + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) + return ERR_CAST(filp); +-- +2.43.0 + diff --git a/queue-6.6/sunrpc-timeout-and-cancel-tls-handshake-with-etimedo.patch b/queue-6.6/sunrpc-timeout-and-cancel-tls-handshake-with-etimedo.patch new file mode 100644 index 00000000000..554ea1fef35 --- /dev/null +++ b/queue-6.6/sunrpc-timeout-and-cancel-tls-handshake-with-etimedo.patch @@ -0,0 +1,64 @@ +From 5fd40b31e790001995d49bebfe4b7fbcf8be0dc0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Nov 2024 08:59:36 -0500 +Subject: SUNRPC: timeout and cancel TLS handshake with -ETIMEDOUT + +From: Benjamin Coddington + +[ Upstream commit d7bdd849ef1b681da03ac05ca0957b2cbe2d24b6 ] + +We've noticed a situation where an unstable TCP connection can cause the +TLS handshake to timeout waiting for userspace to complete it. When this +happens, we don't want to return from xs_tls_handshake_sync() with zero, as +this will cause the upper xprt to be set CONNECTED, and subsequent attempts +to transmit will be returned with -EPIPE. The sunrpc machine does not +recover from this situation and will spin attempting to transmit. + +The return value of tls_handshake_cancel() can be used to detect a race +with completion: + + * tls_handshake_cancel - cancel a pending handshake + * Return values: + * %true - Uncompleted handshake request was canceled + * %false - Handshake request already completed or not found + +If true, we do not want the upper xprt to be connected, so return +-ETIMEDOUT. If false, its possible the handshake request was lost and +that may be the reason for our timeout. Again we do not want the upper +xprt to be connected, so return -ETIMEDOUT. + +Ensure that we alway return an error from xs_tls_handshake_sync() if we +call tls_handshake_cancel(). + +Signed-off-by: Benjamin Coddington +Reviewed-by: Chuck Lever +Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index 714da627fba8e..c528297245125 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -2596,11 +2596,10 @@ static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_par + rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done, + XS_TLS_HANDSHAKE_TO); + if (rc <= 0) { +- if (!tls_handshake_cancel(sk)) { +- if (rc == 0) +- rc = -ETIMEDOUT; +- goto out_put_xprt; +- } ++ tls_handshake_cancel(sk); ++ if (rc == 0) ++ rc = -ETIMEDOUT; ++ goto out_put_xprt; + } + + rc = lower_transport->xprt_err; +-- +2.43.0 + diff --git a/queue-6.6/ubi-fastmap-fix-duplicate-slab-cache-names-while-att.patch b/queue-6.6/ubi-fastmap-fix-duplicate-slab-cache-names-while-att.patch new file mode 100644 index 00000000000..ad25ff22399 --- /dev/null +++ b/queue-6.6/ubi-fastmap-fix-duplicate-slab-cache-names-while-att.patch @@ -0,0 +1,104 @@ +From 158f717d585ec253145d903c6488a212f19f2114 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Oct 2024 12:50:02 +0800 +Subject: ubi: fastmap: Fix duplicate slab cache names while attaching + +From: Zhihao Cheng + +[ Upstream commit bcddf52b7a17adcebc768d26f4e27cf79adb424c ] + +Since commit 4c39529663b9 ("slab: Warn on duplicate cache names when +DEBUG_VM=y"), the duplicate slab cache names can be detected and a +kernel WARNING is thrown out. +In UBI fast attaching process, alloc_ai() could be invoked twice +with the same slab cache name 'ubi_aeb_slab_cache', which will trigger +following warning messages: + kmem_cache of name 'ubi_aeb_slab_cache' already exists + WARNING: CPU: 0 PID: 7519 at mm/slab_common.c:107 + __kmem_cache_create_args+0x100/0x5f0 + Modules linked in: ubi(+) nandsim [last unloaded: nandsim] + CPU: 0 UID: 0 PID: 7519 Comm: modprobe Tainted: G 6.12.0-rc2 + RIP: 0010:__kmem_cache_create_args+0x100/0x5f0 + Call Trace: + __kmem_cache_create_args+0x100/0x5f0 + alloc_ai+0x295/0x3f0 [ubi] + ubi_attach+0x3c3/0xcc0 [ubi] + ubi_attach_mtd_dev+0x17cf/0x3fa0 [ubi] + ubi_init+0x3fb/0x800 [ubi] + do_init_module+0x265/0x7d0 + __x64_sys_finit_module+0x7a/0xc0 + +The problem could be easily reproduced by loading UBI device by fastmap +with CONFIG_DEBUG_VM=y. +Fix it by using different slab names for alloc_ai() callers. + +Fixes: d2158f69a7d4 ("UBI: Remove alloc_ai() slab name from parameter list") +Fixes: fdf10ed710c0 ("ubi: Rework Fastmap attach base code") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + drivers/mtd/ubi/attach.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c +index ae5abe492b52a..adc47b87b38a5 100644 +--- a/drivers/mtd/ubi/attach.c ++++ b/drivers/mtd/ubi/attach.c +@@ -1447,7 +1447,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, + return err; + } + +-static struct ubi_attach_info *alloc_ai(void) ++static struct ubi_attach_info *alloc_ai(const char *slab_name) + { + struct ubi_attach_info *ai; + +@@ -1461,7 +1461,7 @@ static struct ubi_attach_info *alloc_ai(void) + INIT_LIST_HEAD(&ai->alien); + INIT_LIST_HEAD(&ai->fastmap); + ai->volumes = RB_ROOT; +- ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", ++ ai->aeb_slab_cache = kmem_cache_create(slab_name, + sizeof(struct ubi_ainf_peb), + 0, 0, NULL); + if (!ai->aeb_slab_cache) { +@@ -1491,7 +1491,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) + + err = -ENOMEM; + +- scan_ai = alloc_ai(); ++ scan_ai = alloc_ai("ubi_aeb_slab_cache_fastmap"); + if (!scan_ai) + goto out; + +@@ -1557,7 +1557,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) + int err; + struct ubi_attach_info *ai; + +- ai = alloc_ai(); ++ ai = alloc_ai("ubi_aeb_slab_cache"); + if (!ai) + return -ENOMEM; + +@@ -1575,7 +1575,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) + if (err > 0 || mtd_is_eccerr(err)) { + if (err != UBI_NO_FASTMAP) { + destroy_ai(ai); +- ai = alloc_ai(); ++ ai = alloc_ai("ubi_aeb_slab_cache"); + if (!ai) + return -ENOMEM; + +@@ -1614,7 +1614,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) + if (ubi->fm && ubi_dbg_chk_fastmap(ubi)) { + struct ubi_attach_info *scan_ai; + +- scan_ai = alloc_ai(); ++ scan_ai = alloc_ai("ubi_aeb_slab_cache_dbg_chk_fastmap"); + if (!scan_ai) { + err = -ENOMEM; + goto out_wl; +-- +2.43.0 + diff --git a/queue-6.6/ubi-fastmap-wl-schedule-fm_work-if-wear-leveling-poo.patch b/queue-6.6/ubi-fastmap-wl-schedule-fm_work-if-wear-leveling-poo.patch new file mode 100644 index 00000000000..2182ecce939 --- /dev/null +++ b/queue-6.6/ubi-fastmap-wl-schedule-fm_work-if-wear-leveling-poo.patch @@ -0,0 +1,98 @@ +From 255fa47f68d1f2c6653cb62cd7c90636693ebd85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2024 11:26:22 +0800 +Subject: ubi: fastmap: wl: Schedule fm_work if wear-leveling pool is empty + +From: Zhihao Cheng + +[ Upstream commit c4595fe394a289927077e3da561db27811919ee0 ] + +Since commit 14072ee33d5a ("ubi: fastmap: Check wl_pool for free peb +before wear leveling"), wear_leveling_worker() won't schedule fm_work +if wear-leveling pool is empty, which could temporarily disable the +wear-leveling until the fastmap is updated(eg. pool becomes empty). +Fix it by scheduling fm_work if wl_pool is empty during wear-leveing. + +Fixes: 14072ee33d5a ("ubi: fastmap: Check wl_pool for free peb before wear leveling") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + drivers/mtd/ubi/fastmap-wl.c | 19 ++++++++++++++++--- + drivers/mtd/ubi/wl.c | 2 +- + drivers/mtd/ubi/wl.h | 3 ++- + 3 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c +index 863f571f1adb5..79733163ab7d0 100644 +--- a/drivers/mtd/ubi/fastmap-wl.c ++++ b/drivers/mtd/ubi/fastmap-wl.c +@@ -282,14 +282,27 @@ int ubi_wl_get_peb(struct ubi_device *ubi) + * WL sub-system. + * + * @ubi: UBI device description object ++ * @need_fill: whether to fill wear-leveling pool when no PEBs are found + */ +-static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi) ++static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi, ++ bool need_fill) + { + struct ubi_fm_pool *pool = &ubi->fm_wl_pool; + int pnum; + +- if (pool->used == pool->size) ++ if (pool->used == pool->size) { ++ if (need_fill && !ubi->fm_work_scheduled) { ++ /* ++ * We cannot update the fastmap here because this ++ * function is called in atomic context. ++ * Let's fail here and refill/update it as soon as ++ * possible. ++ */ ++ ubi->fm_work_scheduled = 1; ++ schedule_work(&ubi->fm_work); ++ } + return NULL; ++ } + + pnum = pool->pebs[pool->used]; + return ubi->lookuptbl[pnum]; +@@ -311,7 +324,7 @@ static bool need_wear_leveling(struct ubi_device *ubi) + if (!ubi->used.rb_node) + return false; + +- e = next_peb_for_wl(ubi); ++ e = next_peb_for_wl(ubi, false); + if (!e) { + if (!ubi->free.rb_node) + return false; +diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c +index e510e2de2cfe0..886d44019401a 100644 +--- a/drivers/mtd/ubi/wl.c ++++ b/drivers/mtd/ubi/wl.c +@@ -671,7 +671,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + ubi_assert(!ubi->move_to_put); + + #ifdef CONFIG_MTD_UBI_FASTMAP +- if (!next_peb_for_wl(ubi) || ++ if (!next_peb_for_wl(ubi, true) || + #else + if (!ubi->free.rb_node || + #endif +diff --git a/drivers/mtd/ubi/wl.h b/drivers/mtd/ubi/wl.h +index 5ebe374a08aed..1d83e552533a5 100644 +--- a/drivers/mtd/ubi/wl.h ++++ b/drivers/mtd/ubi/wl.h +@@ -5,7 +5,8 @@ + static void update_fastmap_work_fn(struct work_struct *wrk); + static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root); + static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi); +-static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi); ++static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi, ++ bool need_fill); + static bool need_wear_leveling(struct ubi_device *ubi); + static void ubi_fastmap_close(struct ubi_device *ubi); + static inline void ubi_fastmap_init(struct ubi_device *ubi, int *count) +-- +2.43.0 + diff --git a/queue-6.6/ubifs-authentication-fix-use-after-free-in-ubifs_tnc.patch b/queue-6.6/ubifs-authentication-fix-use-after-free-in-ubifs_tnc.patch new file mode 100644 index 00000000000..7fea4c1103e --- /dev/null +++ b/queue-6.6/ubifs-authentication-fix-use-after-free-in-ubifs_tnc.patch @@ -0,0 +1,171 @@ +From b3cc754244a0e5035a73e52520854641ae88bcdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 16:46:59 +0200 +Subject: ubifs: authentication: Fix use-after-free in ubifs_tnc_end_commit + +From: Waqar Hameed + +[ Upstream commit 4617fb8fc15effe8eda4dd898d4e33eb537a7140 ] + +After an insertion in TNC, the tree might split and cause a node to +change its `znode->parent`. A further deletion of other nodes in the +tree (which also could free the nodes), the aforementioned node's +`znode->cparent` could still point to a freed node. This +`znode->cparent` may not be updated when getting nodes to commit in +`ubifs_tnc_start_commit()`. This could then trigger a use-after-free +when accessing the `znode->cparent` in `write_index()` in +`ubifs_tnc_end_commit()`. + +This can be triggered by running + + rm -f /etc/test-file.bin + dd if=/dev/urandom of=/etc/test-file.bin bs=1M count=60 conv=fsync + +in a loop, and with `CONFIG_UBIFS_FS_AUTHENTICATION`. KASAN then +reports: + + BUG: KASAN: use-after-free in ubifs_tnc_end_commit+0xa5c/0x1950 + Write of size 32 at addr ffffff800a3af86c by task ubifs_bgt0_20/153 + + Call trace: + dump_backtrace+0x0/0x340 + show_stack+0x18/0x24 + dump_stack_lvl+0x9c/0xbc + print_address_description.constprop.0+0x74/0x2b0 + kasan_report+0x1d8/0x1f0 + kasan_check_range+0xf8/0x1a0 + memcpy+0x84/0xf4 + ubifs_tnc_end_commit+0xa5c/0x1950 + do_commit+0x4e0/0x1340 + ubifs_bg_thread+0x234/0x2e0 + kthread+0x36c/0x410 + ret_from_fork+0x10/0x20 + + Allocated by task 401: + kasan_save_stack+0x38/0x70 + __kasan_kmalloc+0x8c/0xd0 + __kmalloc+0x34c/0x5bc + tnc_insert+0x140/0x16a4 + ubifs_tnc_add+0x370/0x52c + ubifs_jnl_write_data+0x5d8/0x870 + do_writepage+0x36c/0x510 + ubifs_writepage+0x190/0x4dc + __writepage+0x58/0x154 + write_cache_pages+0x394/0x830 + do_writepages+0x1f0/0x5b0 + filemap_fdatawrite_wbc+0x170/0x25c + file_write_and_wait_range+0x140/0x190 + ubifs_fsync+0xe8/0x290 + vfs_fsync_range+0xc0/0x1e4 + do_fsync+0x40/0x90 + __arm64_sys_fsync+0x34/0x50 + invoke_syscall.constprop.0+0xa8/0x260 + do_el0_svc+0xc8/0x1f0 + el0_svc+0x34/0x70 + el0t_64_sync_handler+0x108/0x114 + el0t_64_sync+0x1a4/0x1a8 + + Freed by task 403: + kasan_save_stack+0x38/0x70 + kasan_set_track+0x28/0x40 + kasan_set_free_info+0x28/0x4c + __kasan_slab_free+0xd4/0x13c + kfree+0xc4/0x3a0 + tnc_delete+0x3f4/0xe40 + ubifs_tnc_remove_range+0x368/0x73c + ubifs_tnc_remove_ino+0x29c/0x2e0 + ubifs_jnl_delete_inode+0x150/0x260 + ubifs_evict_inode+0x1d4/0x2e4 + evict+0x1c8/0x450 + iput+0x2a0/0x3c4 + do_unlinkat+0x2cc/0x490 + __arm64_sys_unlinkat+0x90/0x100 + invoke_syscall.constprop.0+0xa8/0x260 + do_el0_svc+0xc8/0x1f0 + el0_svc+0x34/0x70 + el0t_64_sync_handler+0x108/0x114 + el0t_64_sync+0x1a4/0x1a8 + +The offending `memcpy()` in `ubifs_copy_hash()` has a use-after-free +when a node becomes root in TNC but still has a `cparent` to an already +freed node. More specifically, consider the following TNC: + + zroot + / + / + zp1 + / + / + zn + +Inserting a new node `zn_new` with a key smaller then `zn` will trigger +a split in `tnc_insert()` if `zp1` is full: + + zroot + / \ + / \ + zp1 zp2 + / \ + / \ + zn_new zn + +`zn->parent` has now been moved to `zp2`, *but* `zn->cparent` still +points to `zp1`. + +Now, consider a removal of all the nodes _except_ `zn`. Just when +`tnc_delete()` is about to delete `zroot` and `zp2`: + + zroot + \ + \ + zp2 + \ + \ + zn + +`zroot` and `zp2` get freed and the tree collapses: + + zn + +`zn` now becomes the new `zroot`. + +`get_znodes_to_commit()` will now only find `zn`, the new `zroot`, and +`write_index()` will check its `znode->cparent` that wrongly points to +the already freed `zp1`. `ubifs_copy_hash()` thus gets wrongly called +with `znode->cparent->zbranch[znode->iip].hash` that triggers the +use-after-free! + +Fix this by explicitly setting `znode->cparent` to `NULL` in +`get_znodes_to_commit()` for the root node. The search for the dirty +nodes is bottom-up in the tree. Thus, when `find_next_dirty(znode)` +returns NULL, the current `znode` _is_ the root node. Add an assert for +this. + +Fixes: 16a26b20d2af ("ubifs: authentication: Add hashes to index nodes") +Tested-by: Waqar Hameed +Co-developed-by: Zhihao Cheng +Signed-off-by: Zhihao Cheng +Signed-off-by: Waqar Hameed +Reviewed-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + fs/ubifs/tnc_commit.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c +index a55e04822d16e..7c43e0ccf6d47 100644 +--- a/fs/ubifs/tnc_commit.c ++++ b/fs/ubifs/tnc_commit.c +@@ -657,6 +657,8 @@ static int get_znodes_to_commit(struct ubifs_info *c) + znode->alt = 0; + cnext = find_next_dirty(znode); + if (!cnext) { ++ ubifs_assert(c, !znode->parent); ++ znode->cparent = NULL; + znode->cnext = c->cnext; + break; + } +-- +2.43.0 + diff --git a/queue-6.6/ubifs-correct-the-total-block-count-by-deducting-jou.patch b/queue-6.6/ubifs-correct-the-total-block-count-by-deducting-jou.patch new file mode 100644 index 00000000000..d4bb07f0a43 --- /dev/null +++ b/queue-6.6/ubifs-correct-the-total-block-count-by-deducting-jou.patch @@ -0,0 +1,46 @@ +From cad244d4a43bff39a3331896a9cd738a557d2d6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Sep 2024 09:09:09 +0800 +Subject: ubifs: Correct the total block count by deducting journal reservation + +From: Zhihao Cheng + +[ Upstream commit 84a2bee9c49769310efa19601157ef50a1df1267 ] + +Since commit e874dcde1cbf ("ubifs: Reserve one leb for each journal +head while doing budget"), available space is calulated by deducting +reservation for all journal heads. However, the total block count ( +which is only used by statfs) is not updated yet, which will cause +the wrong displaying for used space(total - available). +Fix it by deducting reservation for all journal heads from total +block count. + +Fixes: e874dcde1cbf ("ubifs: Reserve one leb for each journal head while doing budget") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + fs/ubifs/super.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c +index b08fb28d16b55..3409488d39ba1 100644 +--- a/fs/ubifs/super.c ++++ b/fs/ubifs/super.c +@@ -777,10 +777,10 @@ static void init_constants_master(struct ubifs_info *c) + * necessary to report something for the 'statfs()' call. + * + * Subtract the LEB reserved for GC, the LEB which is reserved for +- * deletions, minimum LEBs for the index, and assume only one journal +- * head is available. ++ * deletions, minimum LEBs for the index, the LEBs which are reserved ++ * for each journal head. + */ +- tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; ++ tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt; + tmp64 *= (long long)c->leb_size - c->leb_overhead; + tmp64 = ubifs_reported_space(c, tmp64); + c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; +-- +2.43.0 + diff --git a/queue-6.6/um-always-dump-trace-for-specified-task-in-show_stac.patch b/queue-6.6/um-always-dump-trace-for-specified-task-in-show_stac.patch new file mode 100644 index 00000000000..695bf215182 --- /dev/null +++ b/queue-6.6/um-always-dump-trace-for-specified-task-in-show_stac.patch @@ -0,0 +1,37 @@ +From 7c9ed4b4c149da7ea7c70c4faa07088dea1805e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Nov 2024 18:39:33 +0800 +Subject: um: Always dump trace for specified task in show_stack + +From: Tiwei Bie + +[ Upstream commit 0f659ff362eac69777c4c191b7e5ccb19d76c67d ] + +Currently, show_stack() always dumps the trace of the current task. +However, it should dump the trace of the specified task if one is +provided. Otherwise, things like running "echo t > sysrq-trigger" +won't work as expected. + +Fixes: 970e51feaddb ("um: Add support for CONFIG_STACKTRACE") +Signed-off-by: Tiwei Bie +Link: https://patch.msgid.link/20241106103933.1132365-1-tiwei.btw@antgroup.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + arch/um/kernel/sysrq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c +index 746715379f12a..7e897e44a03da 100644 +--- a/arch/um/kernel/sysrq.c ++++ b/arch/um/kernel/sysrq.c +@@ -53,5 +53,5 @@ void show_stack(struct task_struct *task, unsigned long *stack, + } + + printk("%sCall Trace:\n", loglvl); +- dump_trace(current, &stackops, (void *)loglvl); ++ dump_trace(task ?: current, &stackops, (void *)loglvl); + } +-- +2.43.0 + diff --git a/queue-6.6/um-fix-potential-integer-overflow-during-physmem-set.patch b/queue-6.6/um-fix-potential-integer-overflow-during-physmem-set.patch new file mode 100644 index 00000000000..df7c4fff2a5 --- /dev/null +++ b/queue-6.6/um-fix-potential-integer-overflow-during-physmem-set.patch @@ -0,0 +1,50 @@ +From c8bb4749336825d518ba7179421f92e2fbd81d0c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2024 12:59:48 +0800 +Subject: um: Fix potential integer overflow during physmem setup + +From: Tiwei Bie + +[ Upstream commit a98b7761f697e590ed5d610d87fa12be66f23419 ] + +This issue happens when the real map size is greater than LONG_MAX, +which can be easily triggered on UML/i386. + +Fixes: fe205bdd1321 ("um: Print minimum physical memory requirement") +Signed-off-by: Tiwei Bie +Link: https://patch.msgid.link/20240916045950.508910-3-tiwei.btw@antgroup.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + arch/um/kernel/physmem.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c +index 91485119ae67a..4339580f5a4f6 100644 +--- a/arch/um/kernel/physmem.c ++++ b/arch/um/kernel/physmem.c +@@ -80,10 +80,10 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long long highmem) + { + unsigned long reserve = reserve_end - start; +- long map_size = len - reserve; ++ unsigned long map_size = len - reserve; + int err; + +- if(map_size <= 0) { ++ if (len <= reserve) { + os_warn("Too few physical memory! Needed=%lu, given=%lu\n", + reserve, len); + exit(1); +@@ -94,7 +94,7 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, + err = os_map_memory((void *) reserve_end, physmem_fd, reserve, + map_size, 1, 1, 1); + if (err < 0) { +- os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p " ++ os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p " + "failed - errno = %d\n", map_size, + (void *) reserve_end, err); + exit(1); +-- +2.43.0 + diff --git a/queue-6.6/um-fix-the-return-value-of-elf_core_copy_task_fpregs.patch b/queue-6.6/um-fix-the-return-value-of-elf_core_copy_task_fpregs.patch new file mode 100644 index 00000000000..997b8f517bd --- /dev/null +++ b/queue-6.6/um-fix-the-return-value-of-elf_core_copy_task_fpregs.patch @@ -0,0 +1,36 @@ +From 84a73effa2595dd3444dd84e2b54d4cbb11b738f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Sep 2024 10:33:02 +0800 +Subject: um: Fix the return value of elf_core_copy_task_fpregs + +From: Tiwei Bie + +[ Upstream commit 865e3845eeaa21e9a62abc1361644e67124f1ec0 ] + +This function is expected to return a boolean value, which should be +true on success and false on failure. + +Fixes: d1254b12c93e ("uml: fix x86_64 core dump crash") +Signed-off-by: Tiwei Bie +Link: https://patch.msgid.link/20240913023302.130300-1-tiwei.btw@antgroup.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + arch/um/kernel/process.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c +index 6daffb9d8a8d7..afe67d8161467 100644 +--- a/arch/um/kernel/process.c ++++ b/arch/um/kernel/process.c +@@ -397,6 +397,6 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) + { + int cpu = current_thread_info()->cpu; + +- return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); ++ return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu) == 0; + } + +-- +2.43.0 +