From: Greg Kroah-Hartman Date: Mon, 7 Nov 2022 12:28:18 +0000 (+0100) Subject: 6.0-stable patches X-Git-Tag: v4.9.333~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bc2f09ccc1c25b7fff815ef07c14d977fe100c94;p=thirdparty%2Fkernel%2Fstable-queue.git 6.0-stable patches added patches: acpi-numa-add-cxl-cfmws-nodes-to-the-possible-nodes-set.patch btrfs-don-t-use-btrfs_chunk-sub_stripes-from-disk.patch btrfs-fix-a-memory-allocation-failure-test-in-btrfs_submit_direct.patch btrfs-fix-tree-mod-log-mishandling-of-reallocated-nodes.patch btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch cxl-pmem-fix-cxl_pmem_region-and-cxl_memdev-leak.patch cxl-region-fix-cxl_region-leak-cleanup-targets-at-region-delete.patch cxl-region-fix-decoder-allocation-crash.patch cxl-region-fix-distance-calculation-with-passthrough-ports.patch cxl-region-fix-region-hpa-ordering-validation.patch ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch --- diff --git a/queue-6.0/acpi-numa-add-cxl-cfmws-nodes-to-the-possible-nodes-set.patch b/queue-6.0/acpi-numa-add-cxl-cfmws-nodes-to-the-possible-nodes-set.patch new file mode 100644 index 00000000000..5917b2a5c40 --- /dev/null +++ b/queue-6.0/acpi-numa-add-cxl-cfmws-nodes-to-the-possible-nodes-set.patch @@ -0,0 +1,42 @@ +From 24f0692bfd41fd207d99c993a5785c3426762046 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 20 Oct 2022 16:54:55 -0700 +Subject: ACPI: NUMA: Add CXL CFMWS 'nodes' to the possible nodes set + +From: Dan Williams + +commit 24f0692bfd41fd207d99c993a5785c3426762046 upstream. + +The ACPI CEDT.CFMWS indicates a range of possible address where new CXL +regions can appear. Each range is associated with a QTG id (QoS +Throttling Group id). For each range + QTG pair that is not covered by a proximity +domain in the SRAT, Linux creates a new NUMA node. However, the commit +that added the new ranges missed updating the node_possible mask which +causes memory_group_register() to fail. Add the new nodes to the +nodes_possible mask. + +Cc: +Fixes: fd49f99c1809 ("ACPI: NUMA: Add a node and memblk for each CFMWS not in SRAT") +Cc: Alison Schofield +Cc: Rafael J. Wysocki +Reported-by: Vishal Verma +Tested-by: Vishal Verma +Acked-by: Rafael J. Wysocki +Reviewed-by: Vishal Verma +Link: https://lore.kernel.org/r/166631003537.1167078.9373680312035292395.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/numa/srat.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/acpi/numa/srat.c ++++ b/drivers/acpi/numa/srat.c +@@ -327,6 +327,7 @@ static int __init acpi_parse_cfmws(union + pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n", + node, start, end); + } ++ node_set(node, numa_nodes_parsed); + + /* Set the next available fake_pxm value */ + (*fake_pxm)++; diff --git a/queue-6.0/btrfs-don-t-use-btrfs_chunk-sub_stripes-from-disk.patch b/queue-6.0/btrfs-don-t-use-btrfs_chunk-sub_stripes-from-disk.patch new file mode 100644 index 00000000000..550d4a7cb82 --- /dev/null +++ b/queue-6.0/btrfs-don-t-use-btrfs_chunk-sub_stripes-from-disk.patch @@ -0,0 +1,91 @@ +From 76a66ba101329316a5d7f4275070be22eb85fdf2 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 21 Oct 2022 08:43:45 +0800 +Subject: btrfs: don't use btrfs_chunk::sub_stripes from disk + +From: Qu Wenruo + +commit 76a66ba101329316a5d7f4275070be22eb85fdf2 upstream. + +[BUG] +There are two reports (the earliest one from LKP, a more recent one from +kernel bugzilla) that we can have some chunks with 0 as sub_stripes. + +This will cause divide-by-zero errors at btrfs_rmap_block, which is +introduced by a recent kernel patch ac0677348f3c ("btrfs: merge +calculations for simple striped profiles in btrfs_rmap_block"): + + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + stripe_nr = stripe_nr * map->num_stripes + i; + stripe_nr = div_u64(stripe_nr, map->sub_stripes); <<< + } + +[CAUSE] +From the more recent report, it has been proven that we have some chunks +with 0 as sub_stripes, mostly caused by older mkfs. + +It turns out that the mkfs.btrfs fix is only introduced in 6718ab4d33aa +("btrfs-progs: Initialize sub_stripes to 1 in btrfs_alloc_data_chunk") +which is included in v5.4 btrfs-progs release. + +So there would be quite some old filesystems with such 0 sub_stripes. + +[FIX] +Just don't trust the sub_stripes values from disk. + +We have a trusted btrfs_raid_array[] to fetch the correct sub_stripes +numbers for each profile and that are fixed. + +By this, we can keep the compatibility with older filesystems while +still avoid divide-by-zero bugs. + +Reported-by: kernel test robot +Reported-by: Viktor Kuzmin +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216559 +Fixes: ac0677348f3c ("btrfs: merge calculations for simple striped profiles in btrfs_rmap_block") +CC: stable@vger.kernel.org # 6.0 +Reviewed-by: Su Yue +Reviewed-by: Johannes Thumshirn +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -7029,6 +7029,7 @@ static int read_one_chunk(struct btrfs_k + u64 devid; + u64 type; + u8 uuid[BTRFS_UUID_SIZE]; ++ int index; + int num_stripes; + int ret; + int i; +@@ -7036,6 +7037,7 @@ static int read_one_chunk(struct btrfs_k + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); ++ index = btrfs_bg_flags_to_raid_index(type); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + #if BITS_PER_LONG == 32 +@@ -7089,7 +7091,15 @@ static int read_one_chunk(struct btrfs_k + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = type; +- map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); ++ /* ++ * We can't use the sub_stripes value, as for profiles other than ++ * RAID10, they may have 0 as sub_stripes for filesystems created by ++ * older mkfs (sub_stripes = btrfs_raid_array[index].sub_stripes; + map->verified_stripes = 0; + em->orig_block_len = btrfs_calc_stripe_length(em); + for (i = 0; i < num_stripes; i++) { diff --git a/queue-6.0/btrfs-fix-a-memory-allocation-failure-test-in-btrfs_submit_direct.patch b/queue-6.0/btrfs-fix-a-memory-allocation-failure-test-in-btrfs_submit_direct.patch new file mode 100644 index 00000000000..a8825ae8aa9 --- /dev/null +++ b/queue-6.0/btrfs-fix-a-memory-allocation-failure-test-in-btrfs_submit_direct.patch @@ -0,0 +1,33 @@ +From 063b1f21cc9be07291a1f5e227436f353c6d1695 Mon Sep 17 00:00:00 2001 +From: Christophe JAILLET +Date: Sun, 30 Oct 2022 08:35:28 +0100 +Subject: btrfs: fix a memory allocation failure test in btrfs_submit_direct + +From: Christophe JAILLET + +commit 063b1f21cc9be07291a1f5e227436f353c6d1695 upstream. + +After allocation 'dip' is tested instead of 'dip->csums'. Fix it. + +Fixes: 642c5d34da53 ("btrfs: allocate the btrfs_dio_private as part of the iomap dio bio") +CC: stable@vger.kernel.org # 5.19+ +Reviewed-by: Nikolay Borisov +Signed-off-by: Christophe JAILLET +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8142,7 +8142,7 @@ static void btrfs_submit_direct(const st + */ + status = BLK_STS_RESOURCE; + dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS); +- if (!dip) ++ if (!dip->csums) + goto out_err; + + status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums); diff --git a/queue-6.0/btrfs-fix-tree-mod-log-mishandling-of-reallocated-nodes.patch b/queue-6.0/btrfs-fix-tree-mod-log-mishandling-of-reallocated-nodes.patch new file mode 100644 index 00000000000..2059e9f54bd --- /dev/null +++ b/queue-6.0/btrfs-fix-tree-mod-log-mishandling-of-reallocated-nodes.patch @@ -0,0 +1,189 @@ +From 968b71583130b6104c9f33ba60446d598e327a8b Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 14 Oct 2022 08:52:46 -0400 +Subject: btrfs: fix tree mod log mishandling of reallocated nodes + +From: Josef Bacik + +commit 968b71583130b6104c9f33ba60446d598e327a8b upstream. + +We have been seeing the following panic in production + + kernel BUG at fs/btrfs/tree-mod-log.c:677! + invalid opcode: 0000 [#1] SMP + RIP: 0010:tree_mod_log_rewind+0x1b4/0x200 + RSP: 0000:ffffc9002c02f890 EFLAGS: 00010293 + RAX: 0000000000000003 RBX: ffff8882b448c700 RCX: 0000000000000000 + RDX: 0000000000008000 RSI: 00000000000000a7 RDI: ffff88877d831c00 + RBP: 0000000000000002 R08: 000000000000009f R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000100c40 R12: 0000000000000001 + R13: ffff8886c26d6a00 R14: ffff88829f5424f8 R15: ffff88877d831a00 + FS: 00007fee1d80c780(0000) GS:ffff8890400c0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fee1963a020 CR3: 0000000434f33002 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + btrfs_get_old_root+0x12b/0x420 + btrfs_search_old_slot+0x64/0x2f0 + ? tree_mod_log_oldest_root+0x3d/0xf0 + resolve_indirect_ref+0xfd/0x660 + ? ulist_alloc+0x31/0x60 + ? kmem_cache_alloc_trace+0x114/0x2c0 + find_parent_nodes+0x97a/0x17e0 + ? ulist_alloc+0x30/0x60 + btrfs_find_all_roots_safe+0x97/0x150 + iterate_extent_inodes+0x154/0x370 + ? btrfs_search_path_in_tree+0x240/0x240 + iterate_inodes_from_logical+0x98/0xd0 + ? btrfs_search_path_in_tree+0x240/0x240 + btrfs_ioctl_logical_to_ino+0xd9/0x180 + btrfs_ioctl+0xe2/0x2ec0 + ? __mod_memcg_lruvec_state+0x3d/0x280 + ? do_sys_openat2+0x6d/0x140 + ? kretprobe_dispatcher+0x47/0x70 + ? kretprobe_rethook_handler+0x38/0x50 + ? rethook_trampoline_handler+0x82/0x140 + ? arch_rethook_trampoline_callback+0x3b/0x50 + ? kmem_cache_free+0xfb/0x270 + ? do_sys_openat2+0xd5/0x140 + __x64_sys_ioctl+0x71/0xb0 + do_syscall_64+0x2d/0x40 + +Which is this code in tree_mod_log_rewind() + + switch (tm->op) { + case BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING: + BUG_ON(tm->slot < n); + +This occurs because we replay the nodes in order that they happened, and +when we do a REPLACE we will log a REMOVE_WHILE_FREEING for every slot, +starting at 0. 'n' here is the number of items in this block, which in +this case was 1, but we had 2 REMOVE_WHILE_FREEING operations. + +The actual root cause of this was that we were replaying operations for +a block that shouldn't have been replayed. Consider the following +sequence of events + +1. We have an already modified root, and we do a btrfs_get_tree_mod_seq(). +2. We begin removing items from this root, triggering KEY_REPLACE for + it's child slots. +3. We remove one of the 2 children this root node points to, thus triggering + the root node promotion of the remaining child, and freeing this node. +4. We modify a new root, and re-allocate the above node to the root node of + this other root. + +The tree mod log looks something like this + + logical 0 op KEY_REPLACE (slot 1) seq 2 + logical 0 op KEY_REMOVE (slot 1) seq 3 + logical 0 op KEY_REMOVE_WHILE_FREEING (slot 0) seq 4 + logical 4096 op LOG_ROOT_REPLACE (old logical 0) seq 5 + logical 8192 op KEY_REMOVE_WHILE_FREEING (slot 1) seq 6 + logical 8192 op KEY_REMOVE_WHILE_FREEING (slot 0) seq 7 + logical 0 op LOG_ROOT_REPLACE (old logical 8192) seq 8 + +>From here the bug is triggered by the following steps + +1. Call btrfs_get_old_root() on the new_root. +2. We call tree_mod_log_oldest_root(btrfs_root_node(new_root)), which is + currently logical 0. +3. tree_mod_log_oldest_root() calls tree_mod_log_search_oldest(), which + gives us the KEY_REPLACE seq 2, and since that's not a + LOG_ROOT_REPLACE we incorrectly believe that we don't have an old + root, because we expect that the most recent change should be a + LOG_ROOT_REPLACE. +4. Back in tree_mod_log_oldest_root() we don't have a LOG_ROOT_REPLACE, + so we don't set old_root, we simply use our existing extent buffer. +5. Since we're using our existing extent buffer (logical 0) we call + tree_mod_log_search(0) in order to get the newest change to start the + rewind from, which ends up being the LOG_ROOT_REPLACE at seq 8. +6. Again since we didn't find an old_root we simply clone logical 0 at + it's current state. +7. We call tree_mod_log_rewind() with the cloned extent buffer. +8. Set n = btrfs_header_nritems(logical 0), which would be whatever the + original nritems was when we COWed the original root, say for this + example it's 2. +9. We start from the newest operation and work our way forward, so we + see LOG_ROOT_REPLACE which we ignore. +10. Next we see KEY_REMOVE_WHILE_FREEING for slot 0, which triggers the + BUG_ON(tm->slot < n), because it expects if we've done this we have a + completely empty extent buffer to replay completely. + +The correct thing would be to find the first LOG_ROOT_REPLACE, and then +get the old_root set to logical 8192. In fact making that change fixes +this particular problem. + +However consider the much more complicated case. We have a child node +in this tree and the above situation. In the above case we freed one +of the child blocks at the seq 3 operation. If this block was also +re-allocated and got new tree mod log operations we would have a +different problem. btrfs_search_old_slot(orig root) would get down to +the logical 0 root that still pointed at that node. However in +btrfs_search_old_slot() we call tree_mod_log_rewind(buf) directly. This +is not context aware enough to know which operations we should be +replaying. If the block was re-allocated multiple times we may only +want to replay a range of operations, and determining what that range is +isn't possible to determine. + +We could maybe solve this by keeping track of which root the node +belonged to at every tree mod log operation, and then passing this +around to make sure we're only replaying operations that relate to the +root we're trying to rewind. + +However there's a simpler way to solve this problem, simply disallow +reallocations if we have currently running tree mod log users. We +already do this for leaf's, so we're simply expanding this to nodes as +well. This is a relatively uncommon occurrence, and the problem is +complicated enough I'm worried that we will still have corner cases in +the reallocation case. So fix this in the most straightforward way +possible. + +Fixes: bd989ba359f2 ("Btrfs: add tree modification log functions") +CC: stable@vger.kernel.org # 3.3+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 25 +++++++++++++------------ + 1 file changed, 13 insertions(+), 12 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -3294,21 +3294,22 @@ void btrfs_free_tree_block(struct btrfs_ + } + + /* +- * If this is a leaf and there are tree mod log users, we may +- * have recorded mod log operations that point to this leaf. +- * So we must make sure no one reuses this leaf's extent before +- * mod log operations are applied to a node, otherwise after +- * rewinding a node using the mod log operations we get an +- * inconsistent btree, as the leaf's extent may now be used as +- * a node or leaf for another different btree. ++ * If there are tree mod log users we may have recorded mod log ++ * operations for this node. If we re-allocate this node we ++ * could replay operations on this node that happened when it ++ * existed in a completely different root. For example if it ++ * was part of root A, then was reallocated to root B, and we ++ * are doing a btrfs_old_search_slot(root b), we could replay ++ * operations that happened when the block was part of root A, ++ * giving us an inconsistent view of the btree. ++ * + * We are safe from races here because at this point no other + * node or root points to this extent buffer, so if after this +- * check a new tree mod log user joins, it will not be able to +- * find a node pointing to this leaf and record operations that +- * point to this leaf. ++ * check a new tree mod log user joins we will not have an ++ * existing log of operations on this node that we have to ++ * contend with. + */ +- if (btrfs_header_level(buf) == 0 && +- test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) ++ if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) + must_pin = true; + + if (must_pin || btrfs_is_zoned(fs_info)) { diff --git a/queue-6.0/btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch b/queue-6.0/btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch new file mode 100644 index 00000000000..2643cfb8c35 --- /dev/null +++ b/queue-6.0/btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch @@ -0,0 +1,44 @@ +From 2398091f9c2c8e0040f4f9928666787a3e8108a7 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Tue, 18 Oct 2022 16:05:52 +0200 +Subject: btrfs: fix type of parameter generation in btrfs_get_dentry + +From: David Sterba + +commit 2398091f9c2c8e0040f4f9928666787a3e8108a7 upstream. + +The type of parameter generation has been u32 since the beginning, +however all callers pass a u64 generation, so unify the types to prevent +potential loss. + +CC: stable@vger.kernel.org # 4.9+ +Reviewed-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/export.c | 2 +- + fs/btrfs/export.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/export.c ++++ b/fs/btrfs/export.c +@@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode + } + + struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, +- u64 root_objectid, u32 generation, ++ u64 root_objectid, u64 generation, + int check_generation) + { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); +--- a/fs/btrfs/export.h ++++ b/fs/btrfs/export.h +@@ -19,7 +19,7 @@ struct btrfs_fid { + } __attribute__ ((packed)); + + struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, +- u64 root_objectid, u32 generation, ++ u64 root_objectid, u64 generation, + int check_generation); + struct dentry *btrfs_get_parent(struct dentry *child); + diff --git a/queue-6.0/cxl-pmem-fix-cxl_pmem_region-and-cxl_memdev-leak.patch b/queue-6.0/cxl-pmem-fix-cxl_pmem_region-and-cxl_memdev-leak.patch new file mode 100644 index 00000000000..480d3457742 --- /dev/null +++ b/queue-6.0/cxl-pmem-fix-cxl_pmem_region-and-cxl_memdev-leak.patch @@ -0,0 +1,269 @@ +From 4d07ae22e79ebc2d7528bbc69daa53b86981cb3a Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 3 Nov 2022 17:30:36 -0700 +Subject: cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak + +From: Dan Williams + +commit 4d07ae22e79ebc2d7528bbc69daa53b86981cb3a upstream. + +When a cxl_nvdimm object goes through a ->remove() event (device +physically removed, nvdimm-bridge disabled, or nvdimm device disabled), +then any associated regions must also be disabled. As highlighted by the +cxl-create-region.sh test [1], a single device may host multiple +regions, but the driver was only tracking one region at a time. This +leads to a situation where only the last enabled region per nvdimm +device is cleaned up properly. Other regions are leaked, and this also +causes cxl_memdev reference leaks. + +Fix the tracking by allowing cxl_nvdimm objects to track multiple region +associations. + +Cc: +Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] +Reported-by: Vishal Verma +Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") +Reviewed-by: Dave Jiang +Reviewed-by: Vishal Verma +Link: https://lore.kernel.org/r/166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/pmem.c | 2 + drivers/cxl/cxl.h | 2 + drivers/cxl/pmem.c | 101 ++++++++++++++++++++++++++++++------------------ + 3 files changed, 68 insertions(+), 37 deletions(-) + +--- a/drivers/cxl/core/pmem.c ++++ b/drivers/cxl/core/pmem.c +@@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct de + { + struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + ++ xa_destroy(&cxl_nvd->pmem_regions); + kfree(cxl_nvd); + } + +@@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_all + + dev = &cxl_nvd->dev; + cxl_nvd->cxlmd = cxlmd; ++ xa_init(&cxl_nvd->pmem_regions); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); + device_set_pm_not_required(dev); +--- a/drivers/cxl/cxl.h ++++ b/drivers/cxl/cxl.h +@@ -423,7 +423,7 @@ struct cxl_nvdimm { + struct device dev; + struct cxl_memdev *cxlmd; + struct cxl_nvdimm_bridge *bridge; +- struct cxl_pmem_region *region; ++ struct xarray pmem_regions; + }; + + struct cxl_pmem_region_mapping { +--- a/drivers/cxl/pmem.c ++++ b/drivers/cxl/pmem.c +@@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdi + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; + struct cxl_pmem_region *cxlr_pmem; ++ unsigned long index; + + device_lock(&cxl_nvb->dev); +- cxlr_pmem = cxl_nvd->region; + dev_set_drvdata(&cxl_nvd->dev, NULL); +- cxl_nvd->region = NULL; +- device_unlock(&cxl_nvb->dev); ++ xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { ++ get_device(&cxlr_pmem->dev); ++ device_unlock(&cxl_nvb->dev); + +- if (cxlr_pmem) { + device_release_driver(&cxlr_pmem->dev); + put_device(&cxlr_pmem->dev); ++ ++ device_lock(&cxl_nvb->dev); + } ++ device_unlock(&cxl_nvb->dev); + + nvdimm_delete(nvdimm); + cxl_nvd->bridge = NULL; +@@ -366,25 +369,49 @@ static int match_cxl_nvdimm(struct devic + + static void unregister_nvdimm_region(void *nd_region) + { +- struct cxl_nvdimm_bridge *cxl_nvb; +- struct cxl_pmem_region *cxlr_pmem; ++ nvdimm_region_delete(nd_region); ++} ++ ++static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, ++ struct cxl_pmem_region *cxlr_pmem) ++{ ++ int rc; ++ ++ rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, ++ cxlr_pmem, GFP_KERNEL); ++ if (rc) ++ return rc; ++ ++ get_device(&cxlr_pmem->dev); ++ return 0; ++} ++ ++static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, ++ struct cxl_pmem_region *cxlr_pmem) ++{ ++ /* ++ * It is possible this is called without a corresponding ++ * cxl_nvdimm_add_region for @cxlr_pmem ++ */ ++ cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); ++ if (cxlr_pmem) ++ put_device(&cxlr_pmem->dev); ++} ++ ++static void release_mappings(void *data) ++{ + int i; ++ struct cxl_pmem_region *cxlr_pmem = data; ++ struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; + +- cxlr_pmem = nd_region_provider_data(nd_region); +- cxl_nvb = cxlr_pmem->bridge; + device_lock(&cxl_nvb->dev); + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; + +- if (cxl_nvd->region) { +- put_device(&cxlr_pmem->dev); +- cxl_nvd->region = NULL; +- } ++ cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); + } + device_unlock(&cxl_nvb->dev); +- +- nvdimm_region_delete(nd_region); + } + + static void cxlr_pmem_remove_resource(void *res) +@@ -422,7 +449,7 @@ static int cxl_pmem_region_probe(struct + if (!cxl_nvb->nvdimm_bus) { + dev_dbg(dev, "nvdimm bus not found\n"); + rc = -ENXIO; +- goto err; ++ goto out_nvb; + } + + memset(&mappings, 0, sizeof(mappings)); +@@ -431,7 +458,7 @@ static int cxl_pmem_region_probe(struct + res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); + if (!res) { + rc = -ENOMEM; +- goto err; ++ goto out_nvb; + } + + res->name = "Persistent Memory"; +@@ -442,11 +469,11 @@ static int cxl_pmem_region_probe(struct + + rc = insert_resource(&iomem_resource, res); + if (rc) +- goto err; ++ goto out_nvb; + + rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); + if (rc) +- goto err; ++ goto out_nvb; + + ndr_desc.res = res; + ndr_desc.provider_data = cxlr_pmem; +@@ -462,7 +489,7 @@ static int cxl_pmem_region_probe(struct + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) { + rc = -ENOMEM; +- goto err; ++ goto out_nvb; + } + + ndr_desc.memregion = cxlr->id; +@@ -472,9 +499,13 @@ static int cxl_pmem_region_probe(struct + info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; +- goto err; ++ goto out_nvb; + } + ++ rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); ++ if (rc) ++ goto out_nvd; ++ + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + struct cxl_memdev *cxlmd = m->cxlmd; +@@ -486,7 +517,7 @@ static int cxl_pmem_region_probe(struct + dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, + dev_name(&cxlmd->dev)); + rc = -ENODEV; +- goto err; ++ goto out_nvd; + } + + /* safe to drop ref now with bridge lock held */ +@@ -498,10 +529,17 @@ static int cxl_pmem_region_probe(struct + dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, + dev_name(&cxlmd->dev)); + rc = -ENODEV; +- goto err; ++ goto out_nvd; + } +- cxl_nvd->region = cxlr_pmem; +- get_device(&cxlr_pmem->dev); ++ ++ /* ++ * Pin the region per nvdimm device as those may be released ++ * out-of-order with respect to the region, and a single nvdimm ++ * maybe associated with multiple regions ++ */ ++ rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); ++ if (rc) ++ goto out_nvd; + m->cxl_nvd = cxl_nvd; + mappings[i] = (struct nd_mapping_desc) { + .nvdimm = nvdimm, +@@ -527,27 +565,18 @@ static int cxl_pmem_region_probe(struct + nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); + if (!cxlr_pmem->nd_region) { + rc = -ENOMEM; +- goto err; ++ goto out_nvd; + } + + rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, + cxlr_pmem->nd_region); +-out: ++out_nvd: + kfree(info); ++out_nvb: + device_unlock(&cxl_nvb->dev); + put_device(&cxl_nvb->dev); + + return rc; +- +-err: +- dev_dbg(dev, "failed to create nvdimm region\n"); +- for (i--; i >= 0; i--) { +- nvdimm = mappings[i].nvdimm; +- cxl_nvd = nvdimm_provider_data(nvdimm); +- put_device(&cxl_nvd->region->dev); +- cxl_nvd->region = NULL; +- } +- goto out; + } + + static struct cxl_driver cxl_pmem_region_driver = { diff --git a/queue-6.0/cxl-region-fix-cxl_region-leak-cleanup-targets-at-region-delete.patch b/queue-6.0/cxl-region-fix-cxl_region-leak-cleanup-targets-at-region-delete.patch new file mode 100644 index 00000000000..df8a3bd3303 --- /dev/null +++ b/queue-6.0/cxl-region-fix-cxl_region-leak-cleanup-targets-at-region-delete.patch @@ -0,0 +1,49 @@ +From 0d9e734018d70cecf79e2e4c6082167160a0f13f Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 3 Nov 2022 17:30:30 -0700 +Subject: cxl/region: Fix cxl_region leak, cleanup targets at region delete + +From: Dan Williams + +commit 0d9e734018d70cecf79e2e4c6082167160a0f13f upstream. + +When a region is deleted any targets that have been previously assigned +to that region hold references to it. Trigger those references to +drop by detaching all targets at unregister_region() time. + +Otherwise that region object will leak as userspace has lost the ability +to detach targets once region sysfs is torn down. + +Cc: +Fixes: b9686e8c8e39 ("cxl/region: Enable the assignment of endpoint decoders to regions") +Reviewed-by: Dave Jiang +Reviewed-by: Vishal Verma +Link: https://lore.kernel.org/r/166752183055.947915.17681995648556534844.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/region.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -1556,8 +1556,19 @@ static struct cxl_region *to_cxl_region( + static void unregister_region(void *dev) + { + struct cxl_region *cxlr = to_cxl_region(dev); ++ struct cxl_region_params *p = &cxlr->params; ++ int i; + + device_del(dev); ++ ++ /* ++ * Now that region sysfs is shutdown, the parameter block is now ++ * read-only, so no need to hold the region rwsem to access the ++ * region parameters. ++ */ ++ for (i = 0; i < p->interleave_ways; i++) ++ detach_target(cxlr, i); ++ + cxl_region_iomem_release(cxlr); + put_device(dev); + } diff --git a/queue-6.0/cxl-region-fix-decoder-allocation-crash.patch b/queue-6.0/cxl-region-fix-decoder-allocation-crash.patch new file mode 100644 index 00000000000..3a3a27efdc1 --- /dev/null +++ b/queue-6.0/cxl-region-fix-decoder-allocation-crash.patch @@ -0,0 +1,144 @@ +From 71ee71d7adcba648077997a29a91158d20c40b09 Mon Sep 17 00:00:00 2001 +From: Vishal Verma +Date: Tue, 1 Nov 2022 01:41:00 -0600 +Subject: cxl/region: Fix decoder allocation crash + +From: Vishal Verma + +commit 71ee71d7adcba648077997a29a91158d20c40b09 upstream. + +When an intermediate port's decoders have been exhausted by existing +regions, and creating a new region with the port in question in it's +hierarchical path is attempted, cxl_port_attach_region() fails to find a +port decoder (as would be expected), and drops into the failure / cleanup +path. + +However, during cleanup of the region reference, a sanity check attempts +to dereference the decoder, which in the above case didn't exist. This +causes a NULL pointer dereference BUG. + +To fix this, refactor the decoder allocation and de-allocation into +helper routines, and in this 'free' routine, check that the decoder, +@cxld, is valid before attempting any operations on it. + +Cc: +Suggested-by: Dan Williams +Signed-off-by: Vishal Verma +Reviewed-by: Dave Jiang +Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") +Link: https://lore.kernel.org/r/20221101074100.1732003-1-vishal.l.verma@intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/region.c | 67 ++++++++++++++++++++++++++++------------------ + 1 file changed, 41 insertions(+), 26 deletions(-) + +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -686,18 +686,27 @@ static struct cxl_region_ref *alloc_regi + return cxl_rr; + } + +-static void free_region_ref(struct cxl_region_ref *cxl_rr) ++static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr) + { +- struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + struct cxl_decoder *cxld = cxl_rr->decoder; + ++ if (!cxld) ++ return; ++ + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); + if (cxld->region == cxlr) { + cxld->region = NULL; + put_device(&cxlr->dev); + } ++} + ++static void free_region_ref(struct cxl_region_ref *cxl_rr) ++{ ++ struct cxl_port *port = cxl_rr->port; ++ struct cxl_region *cxlr = cxl_rr->region; ++ ++ cxl_rr_free_decoder(cxl_rr); + xa_erase(&port->regions, (unsigned long)cxlr); + xa_destroy(&cxl_rr->endpoints); + kfree(cxl_rr); +@@ -728,6 +737,33 @@ static int cxl_rr_ep_add(struct cxl_regi + return 0; + } + ++static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, ++ struct cxl_endpoint_decoder *cxled, ++ struct cxl_region_ref *cxl_rr) ++{ ++ struct cxl_decoder *cxld; ++ ++ if (port == cxled_to_port(cxled)) ++ cxld = &cxled->cxld; ++ else ++ cxld = cxl_region_find_decoder(port, cxlr); ++ if (!cxld) { ++ dev_dbg(&cxlr->dev, "%s: no decoder available\n", ++ dev_name(&port->dev)); ++ return -EBUSY; ++ } ++ ++ if (cxld->region) { ++ dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", ++ dev_name(&port->dev), dev_name(&cxld->dev), ++ dev_name(&cxld->region->dev)); ++ return -EBUSY; ++ } ++ ++ cxl_rr->decoder = cxld; ++ return 0; ++} ++ + /** + * cxl_port_attach_region() - track a region's interest in a port by endpoint + * @port: port to add a new region reference 'struct cxl_region_ref' +@@ -794,12 +830,6 @@ static int cxl_port_attach_region(struct + cxl_rr->nr_targets++; + nr_targets_inc = true; + } +- +- /* +- * The decoder for @cxlr was allocated when the region was first +- * attached to @port. +- */ +- cxld = cxl_rr->decoder; + } else { + cxl_rr = alloc_region_ref(port, cxlr); + if (IS_ERR(cxl_rr)) { +@@ -810,26 +840,11 @@ static int cxl_port_attach_region(struct + } + nr_targets_inc = true; + +- if (port == cxled_to_port(cxled)) +- cxld = &cxled->cxld; +- else +- cxld = cxl_region_find_decoder(port, cxlr); +- if (!cxld) { +- dev_dbg(&cxlr->dev, "%s: no decoder available\n", +- dev_name(&port->dev)); +- goto out_erase; +- } +- +- if (cxld->region) { +- dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", +- dev_name(&port->dev), dev_name(&cxld->dev), +- dev_name(&cxld->region->dev)); +- rc = -EBUSY; ++ rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr); ++ if (rc) + goto out_erase; +- } +- +- cxl_rr->decoder = cxld; + } ++ cxld = cxl_rr->decoder; + + rc = cxl_rr_ep_add(cxl_rr, cxled); + if (rc) { diff --git a/queue-6.0/cxl-region-fix-distance-calculation-with-passthrough-ports.patch b/queue-6.0/cxl-region-fix-distance-calculation-with-passthrough-ports.patch new file mode 100644 index 00000000000..049b2075c28 --- /dev/null +++ b/queue-6.0/cxl-region-fix-distance-calculation-with-passthrough-ports.patch @@ -0,0 +1,114 @@ +From e4f6dfa9ef756a3934a4caf618b1e86e9e8e21d0 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 3 Nov 2022 17:30:54 -0700 +Subject: cxl/region: Fix 'distance' calculation with passthrough ports + +From: Dan Williams + +commit e4f6dfa9ef756a3934a4caf618b1e86e9e8e21d0 upstream. + +When programming port decode targets, the algorithm wants to ensure that +two devices are compatible to be programmed as peers beneath a given +port. A compatible peer is a target that shares the same dport, and +where that target's interleave position also routes it to the same +dport. Compatibility is determined by the device's interleave position +being >= to distance. For example, if a given dport can only map every +Nth position then positions less than N away from the last target +programmed are incompatible. + +The @distance for the host-bridge's cxl_port in a simple dual-ported +host-bridge configuration with 2 direct-attached devices is 1, i.e. An +x2 region divided by 2 dports to reach 2 region targets. + +An x4 region under an x2 host-bridge would need 2 intervening switches +where the @distance at the host bridge level is 2 (x4 region divided by +2 switches to reach 4 devices). + +However, the distance between peers underneath a single ported +host-bridge is always zero because there is no limit to the number of +devices that can be mapped. In other words, there are no decoders to +program in a passthrough, all descendants are mapped and distance only +starts matters for the intervening descendant ports of the passthrough +port. + +Add tracking for the number of dports mapped to a port, and use that to +detect the passthrough case for calculating @distance. + +Cc: +Reported-by: Bobo WL +Reported-by: Jonathan Cameron +Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com +Fixes: 27b3f8d13830 ("cxl/region: Program target lists") +Reviewed-by: Vishal Verma +Link: https://lore.kernel.org/r/166752185440.947915.6617495912508299445.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/port.c | 11 +++++++++-- + drivers/cxl/core/region.c | 9 ++++++++- + drivers/cxl/cxl.h | 2 ++ + 3 files changed, 19 insertions(+), 3 deletions(-) + +--- a/drivers/cxl/core/port.c ++++ b/drivers/cxl/core/port.c +@@ -811,6 +811,7 @@ static struct cxl_dport *find_dport(stru + static int add_dport(struct cxl_port *port, struct cxl_dport *new) + { + struct cxl_dport *dup; ++ int rc; + + device_lock_assert(&port->dev); + dup = find_dport(port, new->port_id); +@@ -821,8 +822,14 @@ static int add_dport(struct cxl_port *po + dev_name(dup->dport)); + return -EBUSY; + } +- return xa_insert(&port->dports, (unsigned long)new->dport, new, +- GFP_KERNEL); ++ ++ rc = xa_insert(&port->dports, (unsigned long)new->dport, new, ++ GFP_KERNEL); ++ if (rc) ++ return rc; ++ ++ port->nr_dports++; ++ return 0; + } + + /* +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -989,7 +989,14 @@ static int cxl_port_setup_targets(struct + if (cxl_rr->nr_targets_set) { + int i, distance; + +- distance = p->nr_targets / cxl_rr->nr_targets; ++ /* ++ * Passthrough ports impose no distance requirements between ++ * peers ++ */ ++ if (port->nr_dports == 1) ++ distance = 0; ++ else ++ distance = p->nr_targets / cxl_rr->nr_targets; + for (i = 0; i < cxl_rr->nr_targets_set; i++) + if (ep->dport == cxlsd->target[i]) { + rc = check_last_peer(cxled, ep, cxl_rr, +--- a/drivers/cxl/cxl.h ++++ b/drivers/cxl/cxl.h +@@ -457,6 +457,7 @@ struct cxl_pmem_region { + * @regions: cxl_region_ref instances, regions mapped by this port + * @parent_dport: dport that points to this port in the parent + * @decoder_ida: allocator for decoder ids ++ * @nr_dports: number of entries in @dports + * @hdm_end: track last allocated HDM decoder instance for allocation ordering + * @commit_end: cursor to track highest committed decoder for commit ordering + * @component_reg_phys: component register capability base address (optional) +@@ -475,6 +476,7 @@ struct cxl_port { + struct xarray regions; + struct cxl_dport *parent_dport; + struct ida decoder_ida; ++ int nr_dports; + int hdm_end; + int commit_end; + resource_size_t component_reg_phys; diff --git a/queue-6.0/cxl-region-fix-region-hpa-ordering-validation.patch b/queue-6.0/cxl-region-fix-region-hpa-ordering-validation.patch new file mode 100644 index 00000000000..691261bd58e --- /dev/null +++ b/queue-6.0/cxl-region-fix-region-hpa-ordering-validation.patch @@ -0,0 +1,54 @@ +From a90accb358ae33ea982a35595573f7a045993f8b Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 3 Nov 2022 17:30:24 -0700 +Subject: cxl/region: Fix region HPA ordering validation + +From: Dan Williams + +commit a90accb358ae33ea982a35595573f7a045993f8b upstream. + +Some regions may not have any address space allocated. Skip them when +validating HPA order otherwise a crash like the following may result: + + devm_cxl_add_region: cxl_acpi cxl_acpi.0: decoder3.4: created region9 + BUG: kernel NULL pointer dereference, address: 0000000000000000 + [..] + RIP: 0010:store_targetN+0x655/0x1740 [cxl_core] + [..] + Call Trace: + + kernfs_fop_write_iter+0x144/0x200 + vfs_write+0x24a/0x4d0 + ksys_write+0x69/0xf0 + do_syscall_64+0x3a/0x90 + +store_targetN+0x655/0x1740: +alloc_region_ref at drivers/cxl/core/region.c:676 +(inlined by) cxl_port_attach_region at drivers/cxl/core/region.c:850 +(inlined by) cxl_region_attach at drivers/cxl/core/region.c:1290 +(inlined by) attach_target at drivers/cxl/core/region.c:1410 +(inlined by) store_targetN at drivers/cxl/core/region.c:1453 + +Cc: +Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") +Reviewed-by: Vishal Verma +Reviewed-by: Dave Jiang +Link: https://lore.kernel.org/r/166752182461.947915.497032805239915067.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/region.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -657,6 +657,9 @@ static struct cxl_region_ref *alloc_regi + xa_for_each(&port->regions, index, iter) { + struct cxl_region_params *ip = &iter->region->params; + ++ if (!ip->res) ++ continue; ++ + if (ip->res->start > p->res->start) { + dev_dbg(&cxlr->dev, + "%s: HPA order violation %s:%pr vs %pr\n", diff --git a/queue-6.0/ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch b/queue-6.0/ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch new file mode 100644 index 00000000000..e0b1c9c1732 --- /dev/null +++ b/queue-6.0/ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch @@ -0,0 +1,139 @@ +From 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 Mon Sep 17 00:00:00 2001 +From: Li Huafei +Date: Thu, 3 Nov 2022 11:10:10 +0800 +Subject: ftrace: Fix use-after-free for dynamic ftrace_ops + +From: Li Huafei + +commit 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 upstream. + +KASAN reported a use-after-free with ftrace ops [1]. It was found from +vmcore that perf had registered two ops with the same content +successively, both dynamic. After unregistering the second ops, a +use-after-free occurred. + +In ftrace_shutdown(), when the second ops is unregistered, the +FTRACE_UPDATE_CALLS command is not set because there is another enabled +ops with the same content. Also, both ops are dynamic and the ftrace +callback function is ftrace_ops_list_func, so the +FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value +of 'command' will be 0 and ftrace_shutdown() will skip the rcu +synchronization. + +However, ftrace may be activated. When the ops is released, another CPU +may be accessing the ops. Add the missing synchronization to fix this +problem. + +[1] +BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] +BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 +Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468 + +CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7 +Hardware name: linux,dummy-virt (DT) +Call trace: + dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132 + show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196 + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b4/0x248 lib/dump_stack.c:118 + print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387 + __kasan_report mm/kasan/report.c:547 [inline] + kasan_report+0x118/0x210 mm/kasan/report.c:564 + check_memory_region_inline mm/kasan/generic.c:187 [inline] + __asan_load8+0x98/0xc0 mm/kasan/generic.c:253 + __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] + ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 + ftrace_graph_call+0x0/0x4 + __might_sleep+0x8/0x100 include/linux/perf_event.h:1170 + __might_fault mm/memory.c:5183 [inline] + __might_fault+0x58/0x70 mm/memory.c:5171 + do_strncpy_from_user lib/strncpy_from_user.c:41 [inline] + strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139 + getname_flags+0xb0/0x31c fs/namei.c:149 + getname+0x2c/0x40 fs/namei.c:209 + [...] + +Allocated by task 14445: + kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 + kasan_set_track mm/kasan/common.c:56 [inline] + __kasan_kmalloc mm/kasan/common.c:479 [inline] + __kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449 + kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493 + kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950 + kmalloc include/linux/slab.h:563 [inline] + kzalloc include/linux/slab.h:675 [inline] + perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230 + perf_event_alloc kernel/events/core.c:11733 [inline] + __do_sys_perf_event_open kernel/events/core.c:11831 [inline] + __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 + __arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723 + [...] + +Freed by task 14445: + kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 + kasan_set_track+0x24/0x34 mm/kasan/common.c:56 + kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358 + __kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437 + __kasan_slab_free mm/kasan/common.c:445 [inline] + kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446 + slab_free_hook mm/slub.c:1569 [inline] + slab_free_freelist_hook mm/slub.c:1608 [inline] + slab_free mm/slub.c:3179 [inline] + kfree+0x12c/0xc10 mm/slub.c:4176 + perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434 + perf_event_alloc kernel/events/core.c:11733 [inline] + __do_sys_perf_event_open kernel/events/core.c:11831 [inline] + __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 + [...] + +Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei1@huawei.com + +Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled") +Cc: stable@vger.kernel.org +Suggested-by: Steven Rostedt +Signed-off-by: Li Huafei +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ftrace.c | 16 +++------------- + 1 file changed, 3 insertions(+), 13 deletions(-) + +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3031,18 +3031,8 @@ int ftrace_shutdown(struct ftrace_ops *o + command |= FTRACE_UPDATE_TRACE_FUNC; + } + +- if (!command || !ftrace_enabled) { +- /* +- * If these are dynamic or per_cpu ops, they still +- * need their data freed. Since, function tracing is +- * not currently active, we can just free them +- * without synchronizing all CPUs. +- */ +- if (ops->flags & FTRACE_OPS_FL_DYNAMIC) +- goto free_ops; +- +- return 0; +- } ++ if (!command || !ftrace_enabled) ++ goto out; + + /* + * If the ops uses a trampoline, then it needs to be +@@ -3079,6 +3069,7 @@ int ftrace_shutdown(struct ftrace_ops *o + removed_ops = NULL; + ops->flags &= ~FTRACE_OPS_FL_REMOVING; + ++out: + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. +@@ -3106,7 +3097,6 @@ int ftrace_shutdown(struct ftrace_ops *o + if (IS_ENABLED(CONFIG_PREEMPTION)) + synchronize_rcu_tasks(); + +- free_ops: + ftrace_trampoline_free(ops); + } + diff --git a/queue-6.0/series b/queue-6.0/series index 6ac76c59ad7..4104a121e2f 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -127,3 +127,14 @@ fscrypt-stop-using-keyrings-subsystem-for-fscrypt_master_key.patch fscrypt-fix-keyring-memory-leak-on-mount-failure.patch clk-renesas-r8a779g0-add-sasyncper-clocks.patch btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch +btrfs-fix-tree-mod-log-mishandling-of-reallocated-nodes.patch +btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch +btrfs-don-t-use-btrfs_chunk-sub_stripes-from-disk.patch +btrfs-fix-a-memory-allocation-failure-test-in-btrfs_submit_direct.patch +acpi-numa-add-cxl-cfmws-nodes-to-the-possible-nodes-set.patch +cxl-pmem-fix-cxl_pmem_region-and-cxl_memdev-leak.patch +cxl-region-fix-decoder-allocation-crash.patch +cxl-region-fix-region-hpa-ordering-validation.patch +cxl-region-fix-cxl_region-leak-cleanup-targets-at-region-delete.patch +cxl-region-fix-distance-calculation-with-passthrough-ports.patch +ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch