From 17ff9fdbde329838a531388e1d0cfa2bcf6c92c5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Apr 2023 10:13:14 +0200 Subject: [PATCH] 6.1-stable patches added patches: maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch maple_tree-add-smp_rmb-to-dead-node-detection.patch maple_tree-be-more-cautious-about-dead-nodes.patch maple_tree-detect-dead-nodes-in-mas_start.patch maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch maple_tree-fix-mas_prev-and-mas_find-state-handling.patch maple_tree-fix-potential-rcu-issue.patch maple_tree-reduce-user-error-potential.patch maple_tree-refine-ma_state-init-from-mas_start.patch maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch mm-enable-maple-tree-rcu-mode-by-default.patch --- ...k-checking-to-rcu-callback-functions.patch | 380 ++++++++++++++++++ ...e-add-smp_rmb-to-dead-node-detection.patch | 52 +++ ...ee-be-more-cautious-about-dead-nodes.patch | 190 +++++++++ ..._tree-detect-dead-nodes-in-mas_start.patch | 44 ++ ...ree-fix-freeing-of-nodes-in-rcu-mode.patch | 182 +++++++++ ...alidated-state-in-mas_wr_store_setup.patch | 39 ++ ...mas_prev-and-mas_find-state-handling.patch | 55 +++ .../maple_tree-fix-potential-rcu-issue.patch | 40 ++ ...ple_tree-reduce-user-error-potential.patch | 51 +++ ...-refine-ma_state-init-from-mas_start.patch | 65 +++ ...e-extra-smp_wmb-from-mas_dead_leaves.patch | 35 ++ ...ache_alloc-and-kmem_cache_alloc_bulk.patch | 294 ++++++++++++++ ...nable-maple-tree-rcu-mode-by-default.patch | 92 +++++ queue-6.1/series | 13 + 14 files changed, 1532 insertions(+) create mode 100644 queue-6.1/maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch create mode 100644 queue-6.1/maple_tree-add-smp_rmb-to-dead-node-detection.patch create mode 100644 queue-6.1/maple_tree-be-more-cautious-about-dead-nodes.patch create mode 100644 queue-6.1/maple_tree-detect-dead-nodes-in-mas_start.patch create mode 100644 queue-6.1/maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch create mode 100644 queue-6.1/maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch create mode 100644 queue-6.1/maple_tree-fix-mas_prev-and-mas_find-state-handling.patch create mode 100644 queue-6.1/maple_tree-fix-potential-rcu-issue.patch create mode 100644 queue-6.1/maple_tree-reduce-user-error-potential.patch create mode 100644 queue-6.1/maple_tree-refine-ma_state-init-from-mas_start.patch create mode 100644 queue-6.1/maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch create mode 100644 queue-6.1/maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch create mode 100644 queue-6.1/mm-enable-maple-tree-rcu-mode-by-default.patch diff --git a/queue-6.1/maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch b/queue-6.1/maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch new file mode 100644 index 00000000000..d032a73acd4 --- /dev/null +++ b/queue-6.1/maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch @@ -0,0 +1,380 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:37 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:54 -0400 +Subject: maple_tree: add RCU lock checking to rcu callback functions +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Suren Baghdasaryan , "Liam R . Howlett" +Message-ID: <20230411151055.2910579-14-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 790e1fa86b340c2bd4a327e01c161f7a1ad885f6 upstream. + +Dereferencing RCU objects within the RCU callback without the RCU check +has caused lockdep to complain. Fix the RCU dereferencing by using the +RCU callback lock to ensure the operation is safe. + +Also stop creating a new lock to use for dereferencing during destruction +of the tree or subtree. Instead, pass through a pointer to the tree that +has the lock that is held for RCU dereferencing checking. It also does +not make sense to use the maple state in the freeing scenario as the tree +walk is a special case where the tree no longer has the normal encodings +and parent pointers. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-8-surenb@google.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Cc: stable@vger.kernel.org +Reported-by: Suren Baghdasaryan +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 188 ++++++++++++++++++++++++++++--------------------------- + 1 file changed, 96 insertions(+), 92 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -814,6 +814,11 @@ static inline void *mt_slot(const struct + return rcu_dereference_check(slots[offset], mt_locked(mt)); + } + ++static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, ++ unsigned char offset) ++{ ++ return rcu_dereference_protected(slots[offset], mt_locked(mt)); ++} + /* + * mas_slot_locked() - Get the slot value when holding the maple tree lock. + * @mas: The maple state +@@ -825,7 +830,7 @@ static inline void *mt_slot(const struct + static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, + unsigned char offset) + { +- return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); ++ return mt_slot_locked(mas->tree, slots, offset); + } + + /* +@@ -897,34 +902,35 @@ static inline void ma_set_meta(struct ma + } + + /* +- * mas_clear_meta() - clear the metadata information of a node, if it exists +- * @mas: The maple state ++ * mt_clear_meta() - clear the metadata information of a node, if it exists ++ * @mt: The maple tree + * @mn: The maple node +- * @mt: The maple node type ++ * @type: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +-static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn, +- enum maple_type mt) ++static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, ++ enum maple_type type) + { + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + +- switch (mt) { ++ switch (type) { + case maple_range_64: + pivots = mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots = mn->mr64.slot; +- next = mas_slot_locked(mas, slots, +- MAPLE_RANGE64_SLOTS - 1); +- if (unlikely((mte_to_node(next) && mte_node_type(next)))) +- return; /* The last slot is a node, no metadata */ ++ next = mt_slot_locked(mt, slots, ++ MAPLE_RANGE64_SLOTS - 1); ++ if (unlikely((mte_to_node(next) && ++ mte_node_type(next)))) ++ return; /* no metadata, could be node */ + } + fallthrough; + case maple_arange_64: +- meta = ma_meta(mn, mt); ++ meta = ma_meta(mn, type); + break; + default: + return; +@@ -5472,7 +5478,7 @@ no_gap: + } + + /* +- * mas_dead_leaves() - Mark all leaves of a node as dead. ++ * mte_dead_leaves() - Mark all leaves of a node as dead. + * @mas: The maple state + * @slots: Pointer to the slot array + * @type: The maple node type +@@ -5482,16 +5488,16 @@ no_gap: + * Return: The number of leaves marked as dead. + */ + static inline +-unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, +- enum maple_type mt) ++unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, ++ void __rcu **slots) + { + struct maple_node *node; + enum maple_type type; + void *entry; + int offset; + +- for (offset = 0; offset < mt_slots[mt]; offset++) { +- entry = mas_slot_locked(mas, slots, offset); ++ for (offset = 0; offset < mt_slot_count(enode); offset++) { ++ entry = mt_slot(mt, slots, offset); + type = mte_node_type(entry); + node = mte_to_node(entry); + /* Use both node and type to catch LE & BE metadata */ +@@ -5506,162 +5512,160 @@ unsigned char mas_dead_leaves(struct ma_ + return offset; + } + +-static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) ++/** ++ * mte_dead_walk() - Walk down a dead tree to just before the leaves ++ * @enode: The maple encoded node ++ * @offset: The starting offset ++ * ++ * Note: This can only be used from the RCU callback context. ++ */ ++static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) + { +- struct maple_node *next; ++ struct maple_node *node, *next; + void __rcu **slots = NULL; + +- next = mas_mn(mas); ++ next = mte_to_node(*enode); + do { +- mas->node = mt_mk_node(next, next->type); +- slots = ma_slots(next, next->type); +- next = mas_slot_locked(mas, slots, offset); ++ *enode = ma_enode_ptr(next); ++ node = mte_to_node(*enode); ++ slots = ma_slots(node, node->type); ++ next = rcu_dereference_protected(slots[offset], ++ lock_is_held(&rcu_callback_map)); + offset = 0; + } while (!ma_is_leaf(next->type)); + + return slots; + } + ++/** ++ * mt_free_walk() - Walk & free a tree in the RCU callback context ++ * @head: The RCU head that's within the node. ++ * ++ * Note: This can only be used from the RCU callback context. ++ */ + static void mt_free_walk(struct rcu_head *head) + { + void __rcu **slots; + struct maple_node *node, *start; +- struct maple_tree mt; ++ struct maple_enode *enode; + unsigned char offset; + enum maple_type type; +- MA_STATE(mas, &mt, 0, 0); + + node = container_of(head, struct maple_node, rcu); + + if (ma_is_leaf(node->type)) + goto free_leaf; + +- mt_init_flags(&mt, node->ma_flags); +- mas_lock(&mas); + start = node; +- mas.node = mt_mk_node(node, node->type); +- slots = mas_dead_walk(&mas, 0); +- node = mas_mn(&mas); ++ enode = mt_mk_node(node, node->type); ++ slots = mte_dead_walk(&enode, 0); ++ node = mte_to_node(enode); + do { + mt_free_bulk(node->slot_len, slots); + offset = node->parent_slot + 1; +- mas.node = node->piv_parent; +- if (mas_mn(&mas) == node) +- goto start_slots_free; +- +- type = mte_node_type(mas.node); +- slots = ma_slots(mte_to_node(mas.node), type); +- if ((offset < mt_slots[type]) && (slots[offset])) +- slots = mas_dead_walk(&mas, offset); +- +- node = mas_mn(&mas); ++ enode = node->piv_parent; ++ if (mte_to_node(enode) == node) ++ goto free_leaf; ++ ++ type = mte_node_type(enode); ++ slots = ma_slots(mte_to_node(enode), type); ++ if ((offset < mt_slots[type]) && ++ rcu_dereference_protected(slots[offset], ++ lock_is_held(&rcu_callback_map))) ++ slots = mte_dead_walk(&enode, offset); ++ node = mte_to_node(enode); + } while ((node != start) || (node->slot_len < offset)); + + slots = ma_slots(node, node->type); + mt_free_bulk(node->slot_len, slots); + +-start_slots_free: +- mas_unlock(&mas); + free_leaf: + mt_free_rcu(&node->rcu); + } + +-static inline void __rcu **mas_destroy_descend(struct ma_state *mas, +- struct maple_enode *prev, unsigned char offset) ++static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, ++ struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) + { + struct maple_node *node; +- struct maple_enode *next = mas->node; ++ struct maple_enode *next = *enode; + void __rcu **slots = NULL; ++ enum maple_type type; ++ unsigned char next_offset = 0; + + do { +- mas->node = next; +- node = mas_mn(mas); +- slots = ma_slots(node, mte_node_type(mas->node)); +- next = mas_slot_locked(mas, slots, 0); +- if ((mte_dead_node(next))) { +- mte_to_node(next)->type = mte_node_type(next); +- next = mas_slot_locked(mas, slots, 1); +- } ++ *enode = next; ++ node = mte_to_node(*enode); ++ type = mte_node_type(*enode); ++ slots = ma_slots(node, type); ++ next = mt_slot_locked(mt, slots, next_offset); ++ if ((mte_dead_node(next))) ++ next = mt_slot_locked(mt, slots, ++next_offset); + +- mte_set_node_dead(mas->node); +- node->type = mte_node_type(mas->node); +- mas_clear_meta(mas, node, node->type); ++ mte_set_node_dead(*enode); ++ node->type = type; + node->piv_parent = prev; + node->parent_slot = offset; +- offset = 0; +- prev = mas->node; ++ offset = next_offset; ++ next_offset = 0; ++ prev = *enode; + } while (!mte_is_leaf(next)); + + return slots; + } + +-static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, ++static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, + bool free) + { + void __rcu **slots; + struct maple_node *node = mte_to_node(enode); + struct maple_enode *start; +- struct maple_tree mt; + +- MA_STATE(mas, &mt, 0, 0); +- +- mas.node = enode; + if (mte_is_leaf(enode)) { + node->type = mte_node_type(enode); + goto free_leaf; + } + +- ma_flags &= ~MT_FLAGS_LOCK_MASK; +- mt_init_flags(&mt, ma_flags); +- mas_lock(&mas); +- +- mte_to_node(enode)->ma_flags = ma_flags; + start = enode; +- slots = mas_destroy_descend(&mas, start, 0); +- node = mas_mn(&mas); ++ slots = mte_destroy_descend(&enode, mt, start, 0); ++ node = mte_to_node(enode); // Updated in the above call. + do { + enum maple_type type; + unsigned char offset; + struct maple_enode *parent, *tmp; + +- node->type = mte_node_type(mas.node); +- node->slot_len = mas_dead_leaves(&mas, slots, node->type); ++ node->slot_len = mte_dead_leaves(enode, mt, slots); + if (free) + mt_free_bulk(node->slot_len, slots); + offset = node->parent_slot + 1; +- mas.node = node->piv_parent; +- if (mas_mn(&mas) == node) +- goto start_slots_free; ++ enode = node->piv_parent; ++ if (mte_to_node(enode) == node) ++ goto free_leaf; + +- type = mte_node_type(mas.node); +- slots = ma_slots(mte_to_node(mas.node), type); ++ type = mte_node_type(enode); ++ slots = ma_slots(mte_to_node(enode), type); + if (offset >= mt_slots[type]) + goto next; + +- tmp = mas_slot_locked(&mas, slots, offset); ++ tmp = mt_slot_locked(mt, slots, offset); + if (mte_node_type(tmp) && mte_to_node(tmp)) { +- parent = mas.node; +- mas.node = tmp; +- slots = mas_destroy_descend(&mas, parent, offset); ++ parent = enode; ++ enode = tmp; ++ slots = mte_destroy_descend(&enode, mt, parent, offset); + } + next: +- node = mas_mn(&mas); +- } while (start != mas.node); ++ node = mte_to_node(enode); ++ } while (start != enode); + +- node = mas_mn(&mas); +- node->type = mte_node_type(mas.node); +- node->slot_len = mas_dead_leaves(&mas, slots, node->type); ++ node = mte_to_node(enode); ++ node->slot_len = mte_dead_leaves(enode, mt, slots); + if (free) + mt_free_bulk(node->slot_len, slots); + +-start_slots_free: +- mas_unlock(&mas); +- + free_leaf: + if (free) + mt_free_rcu(&node->rcu); + else +- mas_clear_meta(&mas, node, node->type); ++ mt_clear_meta(mt, node, node->type); + } + + /* +@@ -5677,10 +5681,10 @@ static inline void mte_destroy_walk(stru + struct maple_node *node = mte_to_node(enode); + + if (mt_in_rcu(mt)) { +- mt_destroy_walk(enode, mt->ma_flags, false); ++ mt_destroy_walk(enode, mt, false); + call_rcu(&node->rcu, mt_free_walk); + } else { +- mt_destroy_walk(enode, mt->ma_flags, true); ++ mt_destroy_walk(enode, mt, true); + } + } + diff --git a/queue-6.1/maple_tree-add-smp_rmb-to-dead-node-detection.patch b/queue-6.1/maple_tree-add-smp_rmb-to-dead-node-detection.patch new file mode 100644 index 00000000000..c6f8be63275 --- /dev/null +++ b/queue-6.1/maple_tree-add-smp_rmb-to-dead-node-detection.patch @@ -0,0 +1,52 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:15:03 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:53 -0400 +Subject: maple_tree: add smp_rmb() to dead node detection +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , "Liam R . Howlett" +Message-ID: <20230411151055.2910579-13-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 0a2b18d948838e16912b3b627b504ab062b7d02a upstream. + +Add an smp_rmb() before reading the parent pointer to ensure that anything +read from the node prior to the parent pointer hasn't been reordered ahead +of this check. + +The is necessary for RCU mode. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-7-surenb@google.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Cc: stable@vger.kernel.org +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -529,9 +529,11 @@ static inline struct maple_node *mte_par + */ + static inline bool ma_dead_node(const struct maple_node *node) + { +- struct maple_node *parent = (void *)((unsigned long) +- node->parent & ~MAPLE_NODE_MASK); ++ struct maple_node *parent; + ++ /* Do not reorder reads from the node prior to the parent check */ ++ smp_rmb(); ++ parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); + return (parent == node); + } + +@@ -546,6 +548,8 @@ static inline bool mte_dead_node(const s + struct maple_node *parent, *node; + + node = mte_to_node(enode); ++ /* Do not reorder reads from the node prior to the parent check */ ++ smp_rmb(); + parent = mte_parent(enode); + return (parent == node); + } diff --git a/queue-6.1/maple_tree-be-more-cautious-about-dead-nodes.patch b/queue-6.1/maple_tree-be-more-cautious-about-dead-nodes.patch new file mode 100644 index 00000000000..5f808bb6d62 --- /dev/null +++ b/queue-6.1/maple_tree-be-more-cautious-about-dead-nodes.patch @@ -0,0 +1,190 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:00 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:48 -0400 +Subject: maple_tree: be more cautious about dead nodes +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, Liam Howlett +Message-ID: <20230411151055.2910579-8-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 39d0bd86c499ecd6abae42a9b7112056c5560691 upstream. + +ma_pivots() and ma_data_end() may be called with a dead node. Ensure to +that the node isn't dead before using the returned values. + +This is necessary for RCU mode of the maple tree. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-1-surenb@google.com +Link: https://lkml.kernel.org/r/20230227173632.3292573-2-surenb@google.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Cc: +Signed-off-by: Liam Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 52 +++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 43 insertions(+), 9 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -534,6 +534,7 @@ static inline bool ma_dead_node(const st + + return (parent == node); + } ++ + /* + * mte_dead_node() - check if the @enode is dead. + * @enode: The encoded maple node +@@ -615,6 +616,8 @@ static inline unsigned int mas_alloc_req + * @node - the maple node + * @type - the node type + * ++ * In the event of a dead node, this array may be %NULL ++ * + * Return: A pointer to the maple node pivots + */ + static inline unsigned long *ma_pivots(struct maple_node *node, +@@ -1086,8 +1089,11 @@ static int mas_ascend(struct ma_state *m + a_type = mas_parent_enum(mas, p_enode); + a_node = mte_parent(p_enode); + a_slot = mte_parent_slot(p_enode); +- pivots = ma_pivots(a_node, a_type); + a_enode = mt_mk_node(a_node, a_type); ++ pivots = ma_pivots(a_node, a_type); ++ ++ if (unlikely(ma_dead_node(a_node))) ++ return 1; + + if (!set_min && a_slot) { + set_min = true; +@@ -1393,6 +1399,9 @@ static inline unsigned char ma_data_end( + { + unsigned char offset; + ++ if (!pivots) ++ return 0; ++ + if (type == maple_arange_64) + return ma_meta_end(node, type); + +@@ -1428,6 +1437,9 @@ static inline unsigned char mas_data_end + return ma_meta_end(node, type); + + pivots = ma_pivots(node, type); ++ if (unlikely(ma_dead_node(node))) ++ return 0; ++ + offset = mt_pivots[type] - 1; + if (likely(!pivots[offset])) + return ma_meta_end(node, type); +@@ -4493,6 +4505,9 @@ static inline int mas_prev_node(struct m + node = mas_mn(mas); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); ++ if (unlikely(ma_dead_node(node))) ++ return 1; ++ + mas->max = pivots[offset]; + if (offset) + mas->min = pivots[offset - 1] + 1; +@@ -4514,6 +4529,9 @@ static inline int mas_prev_node(struct m + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); + offset = ma_data_end(node, mt, pivots, mas->max); ++ if (unlikely(ma_dead_node(node))) ++ return 1; ++ + if (offset) + mas->min = pivots[offset - 1] + 1; + +@@ -4562,6 +4580,7 @@ static inline int mas_next_node(struct m + struct maple_enode *enode; + int level = 0; + unsigned char offset; ++ unsigned char node_end; + enum maple_type mt; + void __rcu **slots; + +@@ -4585,7 +4604,11 @@ static inline int mas_next_node(struct m + node = mas_mn(mas); + mt = mte_node_type(mas->node); + pivots = ma_pivots(node, mt); +- } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); ++ node_end = ma_data_end(node, mt, pivots, mas->max); ++ if (unlikely(ma_dead_node(node))) ++ return 1; ++ ++ } while (unlikely(offset == node_end)); + + slots = ma_slots(node, mt); + pivot = mas_safe_pivot(mas, pivots, ++offset, mt); +@@ -4601,6 +4624,9 @@ static inline int mas_next_node(struct m + mt = mte_node_type(mas->node); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); ++ if (unlikely(ma_dead_node(node))) ++ return 1; ++ + offset = 0; + pivot = pivots[0]; + } +@@ -4647,11 +4673,14 @@ static inline void *mas_next_nentry(stru + return NULL; + } + +- pivots = ma_pivots(node, type); + slots = ma_slots(node, type); +- mas->index = mas_safe_min(mas, pivots, mas->offset); ++ pivots = ma_pivots(node, type); + count = ma_data_end(node, type, pivots, mas->max); +- if (ma_dead_node(node)) ++ if (unlikely(ma_dead_node(node))) ++ return NULL; ++ ++ mas->index = mas_safe_min(mas, pivots, mas->offset); ++ if (unlikely(ma_dead_node(node))) + return NULL; + + if (mas->index > max) +@@ -4809,6 +4838,11 @@ retry: + + slots = ma_slots(mn, mt); + pivots = ma_pivots(mn, mt); ++ if (unlikely(ma_dead_node(mn))) { ++ mas_rewalk(mas, index); ++ goto retry; ++ } ++ + if (offset == mt_pivots[mt]) + pivot = mas->max; + else +@@ -6611,11 +6645,11 @@ static inline void *mas_first_entry(stru + while (likely(!ma_is_leaf(mt))) { + MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); + slots = ma_slots(mn, mt); +- pivots = ma_pivots(mn, mt); +- max = pivots[0]; + entry = mas_slot(mas, slots, 0); ++ pivots = ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) + return NULL; ++ max = pivots[0]; + mas->node = entry; + mn = mas_mn(mas); + mt = mte_node_type(mas->node); +@@ -6635,13 +6669,13 @@ static inline void *mas_first_entry(stru + if (likely(entry)) + return entry; + +- pivots = ma_pivots(mn, mt); +- mas->index = pivots[0] + 1; + mas->offset = 1; + entry = mas_slot(mas, slots, 1); ++ pivots = ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) + return NULL; + ++ mas->index = pivots[0] + 1; + if (mas->index > limit) + goto none; + diff --git a/queue-6.1/maple_tree-detect-dead-nodes-in-mas_start.patch b/queue-6.1/maple_tree-detect-dead-nodes-in-mas_start.patch new file mode 100644 index 00000000000..fa2cc2ff24b --- /dev/null +++ b/queue-6.1/maple_tree-detect-dead-nodes-in-mas_start.patch @@ -0,0 +1,44 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:23 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:50 -0400 +Subject: maple_tree: detect dead nodes in mas_start() +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, Liam Howlett +Message-ID: <20230411151055.2910579-10-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit a7b92d59c885018cb7bb88539892278e4fd64b29 upstream. + +When initially starting a search, the root node may already be in the +process of being replaced in RCU mode. Detect and restart the walk if +this is the case. This is necessary for RCU mode of the maple tree. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-3-surenb@google.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -1352,12 +1352,16 @@ static inline struct maple_enode *mas_st + mas->max = ULONG_MAX; + mas->depth = 0; + ++retry: + root = mas_root(mas); + /* Tree with nodes */ + if (likely(xa_is_node(root))) { + mas->depth = 1; + mas->node = mte_safe_root(root); + mas->offset = 0; ++ if (mte_dead_node(mas->node)) ++ goto retry; ++ + return NULL; + } + diff --git a/queue-6.1/maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch b/queue-6.1/maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch new file mode 100644 index 00000000000..5051e8168fd --- /dev/null +++ b/queue-6.1/maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch @@ -0,0 +1,182 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:39 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:51 -0400 +Subject: maple_tree: fix freeing of nodes in rcu mode +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, Liam Howlett +Message-ID: <20230411151055.2910579-11-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 2e5b4921f8efc9e845f4f04741797d16f36847eb upstream. + +The walk to destroy the nodes was not always setting the node type and +would result in a destroy method potentially using the values as nodes. +Avoid this by setting the correct node types. This is necessary for the +RCU mode of the maple tree. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-4-surenb@google.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 62 insertions(+), 11 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -893,6 +893,44 @@ static inline void ma_set_meta(struct ma + } + + /* ++ * mas_clear_meta() - clear the metadata information of a node, if it exists ++ * @mas: The maple state ++ * @mn: The maple node ++ * @mt: The maple node type ++ * @offset: The offset of the highest sub-gap in this node. ++ * @end: The end of the data in this node. ++ */ ++static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn, ++ enum maple_type mt) ++{ ++ struct maple_metadata *meta; ++ unsigned long *pivots; ++ void __rcu **slots; ++ void *next; ++ ++ switch (mt) { ++ case maple_range_64: ++ pivots = mn->mr64.pivot; ++ if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { ++ slots = mn->mr64.slot; ++ next = mas_slot_locked(mas, slots, ++ MAPLE_RANGE64_SLOTS - 1); ++ if (unlikely((mte_to_node(next) && mte_node_type(next)))) ++ return; /* The last slot is a node, no metadata */ ++ } ++ fallthrough; ++ case maple_arange_64: ++ meta = ma_meta(mn, mt); ++ break; ++ default: ++ return; ++ } ++ ++ meta->gap = 0; ++ meta->end = 0; ++} ++ ++/* + * ma_meta_end() - Get the data end of a node from the metadata + * @mn: The maple node + * @mt: The maple node type +@@ -5433,20 +5471,22 @@ no_gap: + * mas_dead_leaves() - Mark all leaves of a node as dead. + * @mas: The maple state + * @slots: Pointer to the slot array ++ * @type: The maple node type + * + * Must hold the write lock. + * + * Return: The number of leaves marked as dead. + */ + static inline +-unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) ++unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, ++ enum maple_type mt) + { + struct maple_node *node; + enum maple_type type; + void *entry; + int offset; + +- for (offset = 0; offset < mt_slot_count(mas->node); offset++) { ++ for (offset = 0; offset < mt_slots[mt]; offset++) { + entry = mas_slot_locked(mas, slots, offset); + type = mte_node_type(entry); + node = mte_to_node(entry); +@@ -5465,14 +5505,13 @@ unsigned char mas_dead_leaves(struct ma_ + + static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) + { +- struct maple_node *node, *next; ++ struct maple_node *next; + void __rcu **slots = NULL; + + next = mas_mn(mas); + do { +- mas->node = ma_enode_ptr(next); +- node = mas_mn(mas); +- slots = ma_slots(node, node->type); ++ mas->node = mt_mk_node(next, next->type); ++ slots = ma_slots(next, next->type); + next = mas_slot_locked(mas, slots, offset); + offset = 0; + } while (!ma_is_leaf(next->type)); +@@ -5536,11 +5575,14 @@ static inline void __rcu **mas_destroy_d + node = mas_mn(mas); + slots = ma_slots(node, mte_node_type(mas->node)); + next = mas_slot_locked(mas, slots, 0); +- if ((mte_dead_node(next))) ++ if ((mte_dead_node(next))) { ++ mte_to_node(next)->type = mte_node_type(next); + next = mas_slot_locked(mas, slots, 1); ++ } + + mte_set_node_dead(mas->node); + node->type = mte_node_type(mas->node); ++ mas_clear_meta(mas, node, node->type); + node->piv_parent = prev; + node->parent_slot = offset; + offset = 0; +@@ -5560,13 +5602,18 @@ static void mt_destroy_walk(struct maple + + MA_STATE(mas, &mt, 0, 0); + +- if (mte_is_leaf(enode)) ++ mas.node = enode; ++ if (mte_is_leaf(enode)) { ++ node->type = mte_node_type(enode); + goto free_leaf; ++ } + ++ ma_flags &= ~MT_FLAGS_LOCK_MASK; + mt_init_flags(&mt, ma_flags); + mas_lock(&mas); + +- mas.node = start = enode; ++ mte_to_node(enode)->ma_flags = ma_flags; ++ start = enode; + slots = mas_destroy_descend(&mas, start, 0); + node = mas_mn(&mas); + do { +@@ -5574,7 +5621,8 @@ static void mt_destroy_walk(struct maple + unsigned char offset; + struct maple_enode *parent, *tmp; + +- node->slot_len = mas_dead_leaves(&mas, slots); ++ node->type = mte_node_type(mas.node); ++ node->slot_len = mas_dead_leaves(&mas, slots, node->type); + if (free) + mt_free_bulk(node->slot_len, slots); + offset = node->parent_slot + 1; +@@ -5598,7 +5646,8 @@ next: + } while (start != mas.node); + + node = mas_mn(&mas); +- node->slot_len = mas_dead_leaves(&mas, slots); ++ node->type = mte_node_type(mas.node); ++ node->slot_len = mas_dead_leaves(&mas, slots, node->type); + if (free) + mt_free_bulk(node->slot_len, slots); + +@@ -5608,6 +5657,8 @@ start_slots_free: + free_leaf: + if (free) + mt_free_rcu(&node->rcu); ++ else ++ mas_clear_meta(&mas, node, node->type); + } + + /* diff --git a/queue-6.1/maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch b/queue-6.1/maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch new file mode 100644 index 00000000000..976f48f7cff --- /dev/null +++ b/queue-6.1/maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch @@ -0,0 +1,39 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:01 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:45 -0400 +Subject: maple_tree: fix handle of invalidated state in mas_wr_store_setup() +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, "Liam R . Howlett" , SeongJae Park +Message-ID: <20230411151055.2910579-5-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 1202700c3f8cc5f7e4646c3cf05ee6f7c8bc6ccf upstream. + +If an invalidated maple state is encountered during write, reset the maple +state to MAS_START. This will result in a re-walk of the tree to the +correct location for the write. + +Link: https://lore.kernel.org/all/20230107020126.1627-1-sj@kernel.org/ +Link: https://lkml.kernel.org/r/20230120162650.984577-6-Liam.Howlett@oracle.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Reported-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -5594,6 +5594,9 @@ static inline void mte_destroy_walk(stru + + static void mas_wr_store_setup(struct ma_wr_state *wr_mas) + { ++ if (unlikely(mas_is_paused(wr_mas->mas))) ++ mas_reset(wr_mas->mas); ++ + if (!mas_is_start(wr_mas->mas)) { + if (mas_is_none(wr_mas->mas)) { + mas_reset(wr_mas->mas); diff --git a/queue-6.1/maple_tree-fix-mas_prev-and-mas_find-state-handling.patch b/queue-6.1/maple_tree-fix-mas_prev-and-mas_find-state-handling.patch new file mode 100644 index 00000000000..f3d07f8861d --- /dev/null +++ b/queue-6.1/maple_tree-fix-mas_prev-and-mas_find-state-handling.patch @@ -0,0 +1,55 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:01 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:46 -0400 +Subject: maple_tree: fix mas_prev() and mas_find() state handling +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, "Liam R . Howlett" , syzbot+502859d610c661e56545@syzkaller.appspotmail.com +Message-ID: <20230411151055.2910579-6-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 17dc622c7b0f94e49bed030726df4db12ecaa6b5 upstream. + +When mas_prev() does not find anything, set the state to MAS_NONE. + +Handle the MAS_NONE in mas_find() like a MAS_START. + +Link: https://lkml.kernel.org/r/20230120162650.984577-7-Liam.Howlett@oracle.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Reported-by: +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -4844,7 +4844,7 @@ static inline void *mas_prev_entry(struc + + if (mas->index < min) { + mas->index = mas->last = min; +- mas_pause(mas); ++ mas->node = MAS_NONE; + return NULL; + } + retry: +@@ -5906,6 +5906,7 @@ void *mas_prev(struct ma_state *mas, uns + if (!mas->index) { + /* Nothing comes before 0 */ + mas->last = 0; ++ mas->node = MAS_NONE; + return NULL; + } + +@@ -5996,6 +5997,9 @@ void *mas_find(struct ma_state *mas, uns + mas->index = ++mas->last; + } + ++ if (unlikely(mas_is_none(mas))) ++ mas->node = MAS_START; ++ + if (unlikely(mas_is_start(mas))) { + /* First run or continue */ + void *entry; diff --git a/queue-6.1/maple_tree-fix-potential-rcu-issue.patch b/queue-6.1/maple_tree-fix-potential-rcu-issue.patch new file mode 100644 index 00000000000..943455b76cd --- /dev/null +++ b/queue-6.1/maple_tree-fix-potential-rcu-issue.patch @@ -0,0 +1,40 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:12:28 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:43 -0400 +Subject: maple_tree: fix potential rcu issue +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, "Liam R . Howlett" +Message-ID: <20230411151055.2910579-3-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 65be6f058b0eba98dc6c6f197ea9f62c9b6a519f upstream. + +Ensure the node isn't dead after reading the node end. + +Link: https://lkml.kernel.org/r/20230120162650.984577-3-Liam.Howlett@oracle.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -4650,13 +4650,13 @@ static inline void *mas_next_nentry(stru + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + mas->index = mas_safe_min(mas, pivots, mas->offset); ++ count = ma_data_end(node, type, pivots, mas->max); + if (ma_dead_node(node)) + return NULL; + + if (mas->index > max) + return NULL; + +- count = ma_data_end(node, type, pivots, mas->max); + if (mas->offset > count) + return NULL; + diff --git a/queue-6.1/maple_tree-reduce-user-error-potential.patch b/queue-6.1/maple_tree-reduce-user-error-potential.patch new file mode 100644 index 00000000000..c1e6716cdf8 --- /dev/null +++ b/queue-6.1/maple_tree-reduce-user-error-potential.patch @@ -0,0 +1,51 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:12:40 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:44 -0400 +Subject: maple_tree: reduce user error potential +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, "Liam R . Howlett" +Message-ID: <20230411151055.2910579-4-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 50e81c82ad947045c7ed26ddc9acb17276b653b6 upstream. + +When iterating, a user may operate on the tree and cause the maple state +to be altered and left in an unintuitive state. Detect this scenario and +correct it by setting to the limit and invalidating the state. + +Link: https://lkml.kernel.org/r/20230120162650.984577-4-Liam.Howlett@oracle.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -4731,6 +4731,11 @@ static inline void *mas_next_entry(struc + unsigned long last; + enum maple_type mt; + ++ if (mas->index > limit) { ++ mas->index = mas->last = limit; ++ mas_pause(mas); ++ return NULL; ++ } + last = mas->last; + retry: + offset = mas->offset; +@@ -4837,6 +4842,11 @@ static inline void *mas_prev_entry(struc + { + void *entry; + ++ if (mas->index < min) { ++ mas->index = mas->last = min; ++ mas_pause(mas); ++ return NULL; ++ } + retry: + while (likely(!mas_is_none(mas))) { + entry = mas_prev_nentry(mas, min, mas->index); diff --git a/queue-6.1/maple_tree-refine-ma_state-init-from-mas_start.patch b/queue-6.1/maple_tree-refine-ma_state-init-from-mas_start.patch new file mode 100644 index 00000000000..37479227ea9 --- /dev/null +++ b/queue-6.1/maple_tree-refine-ma_state-init-from-mas_start.patch @@ -0,0 +1,65 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:00 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:49 -0400 +Subject: maple_tree: refine ma_state init from mas_start() +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Stable@vger.kernel.org, Vernon Yang , "Liam R . Howlett" +Message-ID: <20230411151055.2910579-9-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 46b345848261009477552d654cb2f65000c30e4d upstream. + +If mas->node is an MAS_START, there are three cases, and they all assign +different values to mas->node and mas->offset. So there is no need to set +them to a default value before updating. + +Update them directly to make them easier to understand and for better +readability. + +Link: https://lkml.kernel.org/r/20221221060058.609003-7-vernon2gm@gmail.com +Cc: +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Vernon Yang +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -1334,7 +1334,7 @@ static void mas_node_count(struct ma_sta + * mas_start() - Sets up maple state for operations. + * @mas: The maple state. + * +- * If mas->node == MAS_START, then set the min, max, depth, and offset to ++ * If mas->node == MAS_START, then set the min, max and depth to + * defaults. + * + * Return: +@@ -1348,22 +1348,22 @@ static inline struct maple_enode *mas_st + if (likely(mas_is_start(mas))) { + struct maple_enode *root; + +- mas->node = MAS_NONE; + mas->min = 0; + mas->max = ULONG_MAX; + mas->depth = 0; +- mas->offset = 0; + + root = mas_root(mas); + /* Tree with nodes */ + if (likely(xa_is_node(root))) { + mas->depth = 1; + mas->node = mte_safe_root(root); ++ mas->offset = 0; + return NULL; + } + + /* empty tree */ + if (unlikely(!root)) { ++ mas->node = MAS_NONE; + mas->offset = MAPLE_NODE_SLOTS; + return NULL; + } diff --git a/queue-6.1/maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch b/queue-6.1/maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch new file mode 100644 index 00000000000..54fa0ba68b5 --- /dev/null +++ b/queue-6.1/maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch @@ -0,0 +1,35 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:37 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:52 -0400 +Subject: maple_tree: remove extra smp_wmb() from mas_dead_leaves() +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Liam Howlett +Message-ID: <20230411151055.2910579-12-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 8372f4d83f96f35915106093cde4565836587123 upstream. + +The call to mte_set_dead_node() before the smp_wmb() already calls +smp_wmb() so this is not needed. This is an optimization for the RCU mode +of the maple tree. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-5-surenb@google.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Cc: stable@vger.kernel.org +Signed-off-by: Liam Howlett +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -5495,7 +5495,6 @@ unsigned char mas_dead_leaves(struct ma_ + break; + + mte_set_node_dead(entry); +- smp_wmb(); /* Needed for RCU */ + node->type = type; + rcu_assign_pointer(slots[offset], node); + } diff --git a/queue-6.1/maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch b/queue-6.1/maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch new file mode 100644 index 00000000000..c6a17758845 --- /dev/null +++ b/queue-6.1/maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch @@ -0,0 +1,294 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:13:02 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:42 -0400 +Subject: maple_tree: remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk() +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , Liam Howlett , Jirka Hladky , Matthew Wilcox +Message-ID: <20230411151055.2910579-2-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 541e06b772c1aaffb3b6a245ccface36d7107af2 upstream. + +Preallocations are common in the VMA code to avoid allocating under +certain locking conditions. The preallocations must also cover the +worst-case scenario. Removing the GFP_ZERO flag from the +kmem_cache_alloc() (and bulk variant) calls will reduce the amount of time +spent zeroing memory that may not be used. Only zero out the necessary +area to keep track of the allocations in the maple state. Zero the entire +node prior to using it in the tree. + +This required internal changes to node counting on allocation, so the test +code is also updated. + +This restores some micro-benchmark performance: up to +9% in mmtests mmap1 +by my testing +10% to +20% in mmap, mmapaddr, mmapmany tests reported by +Red Hat + +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636 +Link: https://lkml.kernel.org/r/20230105160427.2988454-1-Liam.Howlett@oracle.com +Cc: stable@vger.kernel.org +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam Howlett +Reported-by: Jirka Hladky +Suggested-by: Matthew Wilcox (Oracle) +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 80 ++++++++++++++++++++------------------- + tools/testing/radix-tree/maple.c | 18 ++++---- + 2 files changed, 52 insertions(+), 46 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -149,13 +149,12 @@ struct maple_subtree_state { + /* Functions */ + static inline struct maple_node *mt_alloc_one(gfp_t gfp) + { +- return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO); ++ return kmem_cache_alloc(maple_node_cache, gfp); + } + + static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) + { +- return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size, +- nodes); ++ return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); + } + + static inline void mt_free_bulk(size_t size, void __rcu **nodes) +@@ -1123,9 +1122,10 @@ static inline struct maple_node *mas_pop + { + struct maple_alloc *ret, *node = mas->alloc; + unsigned long total = mas_allocated(mas); ++ unsigned int req = mas_alloc_req(mas); + + /* nothing or a request pending. */ +- if (unlikely(!total)) ++ if (WARN_ON(!total)) + return NULL; + + if (total == 1) { +@@ -1135,27 +1135,25 @@ static inline struct maple_node *mas_pop + goto single_node; + } + +- if (!node->node_count) { ++ if (node->node_count == 1) { + /* Single allocation in this node. */ + mas->alloc = node->slot[0]; +- node->slot[0] = NULL; + mas->alloc->total = node->total - 1; + ret = node; + goto new_head; + } +- + node->total--; +- ret = node->slot[node->node_count]; +- node->slot[node->node_count--] = NULL; ++ ret = node->slot[--node->node_count]; ++ node->slot[node->node_count] = NULL; + + single_node: + new_head: +- ret->total = 0; +- ret->node_count = 0; +- if (ret->request_count) { +- mas_set_alloc_req(mas, ret->request_count + 1); +- ret->request_count = 0; ++ if (req) { ++ req++; ++ mas_set_alloc_req(mas, req); + } ++ ++ memset(ret, 0, sizeof(*ret)); + return (struct maple_node *)ret; + } + +@@ -1174,21 +1172,20 @@ static inline void mas_push_node(struct + unsigned long count; + unsigned int requested = mas_alloc_req(mas); + +- memset(reuse, 0, sizeof(*reuse)); + count = mas_allocated(mas); + +- if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) { +- if (head->slot[0]) +- head->node_count++; +- head->slot[head->node_count] = reuse; ++ reuse->request_count = 0; ++ reuse->node_count = 0; ++ if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { ++ head->slot[head->node_count++] = reuse; + head->total++; + goto done; + } + + reuse->total = 1; + if ((head) && !((unsigned long)head & 0x1)) { +- head->request_count = 0; + reuse->slot[0] = head; ++ reuse->node_count = 1; + reuse->total += head->total; + } + +@@ -1207,7 +1204,6 @@ static inline void mas_alloc_nodes(struc + { + struct maple_alloc *node; + unsigned long allocated = mas_allocated(mas); +- unsigned long success = allocated; + unsigned int requested = mas_alloc_req(mas); + unsigned int count; + void **slots = NULL; +@@ -1223,24 +1219,29 @@ static inline void mas_alloc_nodes(struc + WARN_ON(!allocated); + } + +- if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) { ++ if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { + node = (struct maple_alloc *)mt_alloc_one(gfp); + if (!node) + goto nomem_one; + +- if (allocated) ++ if (allocated) { + node->slot[0] = mas->alloc; ++ node->node_count = 1; ++ } else { ++ node->node_count = 0; ++ } + +- success++; + mas->alloc = node; ++ node->total = ++allocated; + requested--; + } + + node = mas->alloc; ++ node->request_count = 0; + while (requested) { + max_req = MAPLE_ALLOC_SLOTS; +- if (node->slot[0]) { +- unsigned int offset = node->node_count + 1; ++ if (node->node_count) { ++ unsigned int offset = node->node_count; + + slots = (void **)&node->slot[offset]; + max_req -= offset; +@@ -1254,15 +1255,13 @@ static inline void mas_alloc_nodes(struc + goto nomem_bulk; + + node->node_count += count; +- /* zero indexed. */ +- if (slots == (void **)&node->slot) +- node->node_count--; +- +- success += count; ++ allocated += count; + node = node->slot[0]; ++ node->node_count = 0; ++ node->request_count = 0; + requested -= count; + } +- mas->alloc->total = success; ++ mas->alloc->total = allocated; + return; + + nomem_bulk: +@@ -1271,7 +1270,7 @@ nomem_bulk: + nomem_one: + mas_set_alloc_req(mas, requested); + if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) +- mas->alloc->total = success; ++ mas->alloc->total = allocated; + mas_set_err(mas, -ENOMEM); + return; + +@@ -5720,6 +5719,7 @@ int mas_preallocate(struct ma_state *mas + void mas_destroy(struct ma_state *mas) + { + struct maple_alloc *node; ++ unsigned long total; + + /* + * When using mas_for_each() to insert an expected number of elements, +@@ -5742,14 +5742,20 @@ void mas_destroy(struct ma_state *mas) + } + mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); + +- while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) { ++ total = mas_allocated(mas); ++ while (total) { + node = mas->alloc; + mas->alloc = node->slot[0]; +- if (node->node_count > 0) +- mt_free_bulk(node->node_count, +- (void __rcu **)&node->slot[1]); ++ if (node->node_count > 1) { ++ size_t count = node->node_count - 1; ++ ++ mt_free_bulk(count, (void __rcu **)&node->slot[1]); ++ total -= count; ++ } + kmem_cache_free(maple_node_cache, node); ++ total--; + } ++ + mas->alloc = NULL; + } + EXPORT_SYMBOL_GPL(mas_destroy); +--- a/tools/testing/radix-tree/maple.c ++++ b/tools/testing/radix-tree/maple.c +@@ -172,11 +172,11 @@ static noinline void check_new_node(stru + + if (!MAPLE_32BIT) { + if (i >= 35) +- e = i - 35; ++ e = i - 34; + else if (i >= 5) +- e = i - 5; ++ e = i - 4; + else if (i >= 2) +- e = i - 2; ++ e = i - 1; + } else { + if (i >= 4) + e = i - 4; +@@ -304,17 +304,17 @@ static noinline void check_new_node(stru + MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); + MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); +- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); ++ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); + + mn = mas_pop_node(&mas); /* get the next node. */ + MT_BUG_ON(mt, mn == NULL); + MT_BUG_ON(mt, not_empty(mn)); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS); +- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2); ++ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + + mas_push_node(&mas, mn); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); +- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); ++ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); + + /* Check the limit of pop/push/pop */ + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */ +@@ -322,14 +322,14 @@ static noinline void check_new_node(stru + MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); + MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); + MT_BUG_ON(mt, mas_alloc_req(&mas)); +- MT_BUG_ON(mt, mas.alloc->node_count); ++ MT_BUG_ON(mt, mas.alloc->node_count != 1); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); + mn = mas_pop_node(&mas); + MT_BUG_ON(mt, not_empty(mn)); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); +- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); ++ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); + mas_push_node(&mas, mn); +- MT_BUG_ON(mt, mas.alloc->node_count); ++ MT_BUG_ON(mt, mas.alloc->node_count != 1); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); + mn = mas_pop_node(&mas); + MT_BUG_ON(mt, not_empty(mn)); diff --git a/queue-6.1/mm-enable-maple-tree-rcu-mode-by-default.patch b/queue-6.1/mm-enable-maple-tree-rcu-mode-by-default.patch new file mode 100644 index 00000000000..9c69bf5d9d4 --- /dev/null +++ b/queue-6.1/mm-enable-maple-tree-rcu-mode-by-default.patch @@ -0,0 +1,92 @@ +From stable-owner@vger.kernel.org Tue Apr 11 17:14:26 2023 +From: "Liam R. Howlett" +Date: Tue, 11 Apr 2023 11:10:55 -0400 +Subject: mm: enable maple tree RCU mode by default. +To: Greg Kroah-Hartman , stable@vger.kernel.org +Cc: maple-tree@lists.infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Liam R. Howlett" , "Liam R . Howlett" , syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com +Message-ID: <20230411151055.2910579-15-Liam.Howlett@oracle.com> + +From: "Liam R. Howlett" + +commit 3dd4432549415f3c65dd52d5c687629efbf4ece1 upstream. + +Use the maple tree in RCU mode for VMA tracking. + +The maple tree tracks the stack and is able to update the pivot +(lower/upper boundary) in-place to allow the page fault handler to write +to the tree while holding just the mmap read lock. This is safe as the +writes to the stack have a guard VMA which ensures there will always be +a NULL in the direction of the growth and thus will only update a pivot. + +It is possible, but not recommended, to have VMAs that grow up/down +without guard VMAs. syzbot has constructed a testcase which sets up a +VMA to grow and consume the empty space. Overwriting the entire NULL +entry causes the tree to be altered in a way that is not safe for +concurrent readers; the readers may see a node being rewritten or one +that does not match the maple state they are using. + +Enabling RCU mode allows the concurrent readers to see a stable node and +will return the expected result. + +Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com +Cc: stable@vger.kernel.org +Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") +Signed-off-by: Liam R. Howlett +Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm_types.h | 3 ++- + kernel/fork.c | 3 +++ + mm/mmap.c | 3 ++- + 3 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -725,7 +725,8 @@ struct mm_struct { + unsigned long cpu_bitmap[]; + }; + +-#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) ++#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ ++ MT_FLAGS_USE_RCU) + extern struct mm_struct init_mm; + + /* Pointer magic because the dynamic array size confuses some compilers. */ +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(str + if (retval) + goto out; + ++ mt_clear_in_rcu(mas.tree); + mas_for_each(&old_mas, mpnt, ULONG_MAX) { + struct file *file; + +@@ -703,6 +704,8 @@ static __latent_entropy int dup_mmap(str + retval = arch_dup_mmap(oldmm, mm); + loop_out: + mas_destroy(&mas); ++ if (!retval) ++ mt_set_in_rcu(mas.tree); + out: + mmap_write_unlock(mm); + flush_tlb_mm(oldmm); +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2308,7 +2308,7 @@ do_mas_align_munmap(struct ma_state *mas + int count = 0; + int error = -ENOMEM; + MA_STATE(mas_detach, &mt_detach, 0, 0); +- mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); ++ mt_init_flags(&mt_detach, mas->tree->ma_flags & MT_FLAGS_LOCK_MASK); + mt_set_external_lock(&mt_detach, &mm->mmap_lock); + + if (mas_preallocate(mas, vma, GFP_KERNEL)) +@@ -3095,6 +3095,7 @@ void exit_mmap(struct mm_struct *mm) + */ + set_bit(MMF_OOM_SKIP, &mm->flags); + mmap_write_lock(mm); ++ mt_clear_in_rcu(&mm->mm_mt); + free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, + USER_PGTABLES_CEILING); + tlb_finish_mmu(&tlb); diff --git a/queue-6.1/series b/queue-6.1/series index d955fb8b55c..c784d3acd1a 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -149,3 +149,16 @@ drm-bridge-lt9611-fix-pll-being-unable-to-lock.patch drm-i915-use-_mmio_pipe-for-skl_bottom_color.patch drm-i915-split-icl_color_commit_noarm-from-skl_color_commit_noarm.patch mm-take-a-page-reference-when-removing-device-exclusive-entries.patch +maple_tree-remove-gfp_zero-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk.patch +maple_tree-fix-potential-rcu-issue.patch +maple_tree-reduce-user-error-potential.patch +maple_tree-fix-handle-of-invalidated-state-in-mas_wr_store_setup.patch +maple_tree-fix-mas_prev-and-mas_find-state-handling.patch +maple_tree-be-more-cautious-about-dead-nodes.patch +maple_tree-refine-ma_state-init-from-mas_start.patch +maple_tree-detect-dead-nodes-in-mas_start.patch +maple_tree-fix-freeing-of-nodes-in-rcu-mode.patch +maple_tree-remove-extra-smp_wmb-from-mas_dead_leaves.patch +maple_tree-add-smp_rmb-to-dead-node-detection.patch +maple_tree-add-rcu-lock-checking-to-rcu-callback-functions.patch +mm-enable-maple-tree-rcu-mode-by-default.patch -- 2.47.3