From: Greg Kroah-Hartman Date: Tue, 13 Aug 2024 10:48:59 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v6.1.105~20 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7f7f61605fafbf037ae8807c5595a8431b88423b;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: fix-gcc-4.9-build-issue-in-5.10.y.patch netfilter-nf_tables-allow-clone-callbacks-to-sleep.patch netfilter-nf_tables-prefer-nft_chain_validate.patch netfilter-nf_tables-set-element-extended-ack-reporting-support.patch netfilter-nf_tables-use-timestamp-to-check-for-set-element-timeout.patch pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch --- diff --git a/queue-5.10/fix-gcc-4.9-build-issue-in-5.10.y.patch b/queue-5.10/fix-gcc-4.9-build-issue-in-5.10.y.patch new file mode 100644 index 00000000000..a3086082f81 --- /dev/null +++ b/queue-5.10/fix-gcc-4.9-build-issue-in-5.10.y.patch @@ -0,0 +1,35 @@ +From jariruusu@protonmail.com Tue Aug 13 12:30:55 2024 +From: Jari Ruusu +Date: Fri, 26 Jul 2024 09:53:18 +0000 +Subject: Fix gcc 4.9 build issue in 5.10.y +To: Greg Kroah-Hartman +Cc: "linux-kernel@vger.kernel.org" , "stable@vger.kernel.org" +Message-ID: <93RnVgeI76u-tf0ZRdROl_JVVqqx-rtQnV4mOqGR_Rb5OmiWCMXC6MSYfnkTPp_615nKq8H-5nfzNt4I9MXPjUPzXBLp625jtGUJSGPsGBo=@protonmail.com> + +From: Jari Ruusu + +Some older systems still compile kernels with old gcc version. +These warnings and errors show up when compiling with gcc 4.9.2 + + error: "__GCC4_has_attribute___uninitialized__" is not defined [-Werror=undef] + +Upstream won't need this because newer kernels are not compilable with gcc 4.9. + +Subject: gcc-4.9 warning/error fix for 5.10.223-rc1 +Fixes: fd7eea27a3ae ("Compiler Attributes: Add __uninitialized macro") +Signed-off-by: Jari Ruusu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/compiler_attributes.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/linux/compiler_attributes.h ++++ b/include/linux/compiler_attributes.h +@@ -37,6 +37,7 @@ + # define __GCC4_has_attribute___nonstring__ 0 + # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) + # define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9) ++# define __GCC4_has_attribute___uninitialized__ 0 + # define __GCC4_has_attribute___fallthrough__ 0 + # define __GCC4_has_attribute___warning__ 1 + #endif diff --git a/queue-5.10/netfilter-nf_tables-allow-clone-callbacks-to-sleep.patch b/queue-5.10/netfilter-nf_tables-allow-clone-callbacks-to-sleep.patch new file mode 100644 index 00000000000..69dad031366 --- /dev/null +++ b/queue-5.10/netfilter-nf_tables-allow-clone-callbacks-to-sleep.patch @@ -0,0 +1,142 @@ +From stable+bounces-66452-greg=kroah.com@vger.kernel.org Mon Aug 12 12:28:36 2024 +From: Pablo Neira Ayuso +Date: Mon, 12 Aug 2024 12:27:41 +0200 +Subject: netfilter: nf_tables: allow clone callbacks to sleep +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240812102742.388214-4-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit fa23e0d4b756d25829e124d6b670a4c6bbd4bf7e upstream. + +Sven Auhagen reports transaction failures with following error: + ./main.nft:13:1-26: Error: Could not process rule: Cannot allocate memory + percpu: allocation failed, size=16 align=8 atomic=1, atomic alloc failed, no space left + +This points to failing pcpu allocation with GFP_ATOMIC flag. +However, transactions happen from user context and are allowed to sleep. + +One case where we can call into percpu allocator with GFP_ATOMIC is +nft_counter expression. + +Normally this happens from control plane, so this could use GFP_KERNEL +instead. But one use case, element insertion from packet path, +needs to use GFP_ATOMIC allocations (nft_dynset expression). + +At this time, .clone callbacks always use GFP_ATOMIC for this reason. + +Add gfp_t argument to the .clone function and pass GFP_KERNEL or +GFP_ATOMIC flag depending on context, this allows all clone memory +allocations to sleep for the normal (transaction) case. + +Cc: Sven Auhagen +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 4 ++-- + net/netfilter/nf_tables_api.c | 8 ++++---- + net/netfilter/nft_connlimit.c | 2 +- + net/netfilter/nft_counter.c | 4 ++-- + net/netfilter/nft_dynset.c | 2 +- + 5 files changed, 10 insertions(+), 10 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -786,7 +786,7 @@ struct nft_expr_ops { + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + int (*clone)(struct nft_expr *dst, +- const struct nft_expr *src); ++ const struct nft_expr *src, gfp_t gfp); + unsigned int size; + + int (*init)(const struct nft_ctx *ctx, +@@ -837,7 +837,7 @@ static inline void *nft_expr_priv(const + return (void *)expr->data; + } + +-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); ++int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); + void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); + int nft_expr_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_expr *expr); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -2968,13 +2968,13 @@ err_expr_parse: + return ERR_PTR(err); + } + +-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) ++int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp) + { + int err; + + if (src->ops->clone) { + dst->ops = src->ops; +- err = src->ops->clone(dst, src); ++ err = src->ops->clone(dst, src, gfp); + if (err < 0) + return err; + } else { +@@ -5524,7 +5524,7 @@ static int nft_set_elem_expr_setup(struc + if (expr == NULL) + return 0; + +- err = nft_expr_clone(elem_expr, expr); ++ err = nft_expr_clone(elem_expr, expr, GFP_KERNEL); + if (err < 0) + return -ENOMEM; + +@@ -5632,7 +5632,7 @@ static int nft_add_set_elem(struct nft_c + if (!expr) + return -ENOMEM; + +- err = nft_expr_clone(expr, set->expr); ++ err = nft_expr_clone(expr, set->expr, GFP_KERNEL); + if (err < 0) + goto err_set_elem_expr; + } +--- a/net/netfilter/nft_connlimit.c ++++ b/net/netfilter/nft_connlimit.c +@@ -195,7 +195,7 @@ static void nft_connlimit_destroy(const + nft_connlimit_do_destroy(ctx, priv); + } + +-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) ++static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) + { + struct nft_connlimit *priv_dst = nft_expr_priv(dst); + struct nft_connlimit *priv_src = nft_expr_priv(src); +--- a/net/netfilter/nft_counter.c ++++ b/net/netfilter/nft_counter.c +@@ -224,7 +224,7 @@ static void nft_counter_destroy(const st + nft_counter_do_destroy(priv); + } + +-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) ++static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) + { + struct nft_counter_percpu_priv *priv = nft_expr_priv(src); + struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); +@@ -234,7 +234,7 @@ static int nft_counter_clone(struct nft_ + + nft_counter_fetch(priv, &total); + +- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); ++ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp); + if (cpu_stats == NULL) + return -ENOMEM; + +--- a/net/netfilter/nft_dynset.c ++++ b/net/netfilter/nft_dynset.c +@@ -48,7 +48,7 @@ static void *nft_dynset_new(struct nft_s + + ext = nft_set_elem_ext(set, elem); + if (priv->expr != NULL && +- nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) ++ nft_expr_clone(nft_set_ext_expr(ext), priv->expr, GFP_ATOMIC) < 0) + goto err2; + + return elem; diff --git a/queue-5.10/netfilter-nf_tables-prefer-nft_chain_validate.patch b/queue-5.10/netfilter-nf_tables-prefer-nft_chain_validate.patch new file mode 100644 index 00000000000..585d084819c --- /dev/null +++ b/queue-5.10/netfilter-nf_tables-prefer-nft_chain_validate.patch @@ -0,0 +1,204 @@ +From stable+bounces-66454-greg=kroah.com@vger.kernel.org Mon Aug 12 12:28:45 2024 +From: Pablo Neira Ayuso +Date: Mon, 12 Aug 2024 12:27:42 +0200 +Subject: netfilter: nf_tables: prefer nft_chain_validate +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240812102742.388214-5-pablo@netfilter.org> + +From: Florian Westphal + +commit cff3bd012a9512ac5ed858d38e6ed65f6391008c upstream. + +nft_chain_validate already performs loop detection because a cycle will +result in a call stack overflow (ctx->level >= NFT_JUMP_STACK_SIZE). + +It also follows maps via ->validate callback in nft_lookup, so there +appears no reason to iterate the maps again. + +nf_tables_check_loops() and all its helper functions can be removed. +This improves ruleset load time significantly, from 23s down to 12s. + +This also fixes a crash bug. Old loop detection code can result in +unbounded recursion: + +BUG: TASK stack guard page was hit at .... +Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN +CPU: 4 PID: 1539 Comm: nft Not tainted 6.10.0-rc5+ #1 +[..] + +with a suitable ruleset during validation of register stores. + +I can't see any actual reason to attempt to check for this from +nft_validate_register_store(), at this point the transaction is still in +progress, so we don't have a full picture of the rule graph. + +For nf-next it might make sense to either remove it or make this depend +on table->validate_state in case we could catch an error earlier +(for improved error reporting to userspace). + +Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 127 ++++-------------------------------------- + 1 file changed, 13 insertions(+), 114 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3349,6 +3349,15 @@ static void nf_tables_rule_release(const + nf_tables_rule_destroy(ctx, rule); + } + ++/** nft_chain_validate - loop detection and hook validation ++ * ++ * @ctx: context containing call depth and base chain ++ * @chain: chain to validate ++ * ++ * Walk through the rules of the given chain and chase all jumps/gotos ++ * and set lookups until either the jump limit is hit or all reachable ++ * chains have been validated. ++ */ + int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) + { + struct nft_expr *expr, *last; +@@ -3367,6 +3376,9 @@ int nft_chain_validate(const struct nft_ + if (!expr->ops->validate) + continue; + ++ /* This may call nft_chain_validate() recursively, ++ * callers that do so must increment ctx->level. ++ */ + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; +@@ -9087,119 +9099,6 @@ int nft_chain_validate_hooks(const struc + } + EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); + +-/* +- * Loop detection - walk through the ruleset beginning at the destination chain +- * of a new jump until either the source chain is reached (loop) or all +- * reachable chains have been traversed. +- * +- * The loop check is performed whenever a new jump verdict is added to an +- * expression or verdict map or a verdict map is bound to a new chain. +- */ +- +-static int nf_tables_check_loops(const struct nft_ctx *ctx, +- const struct nft_chain *chain); +- +-static int nft_check_loops(const struct nft_ctx *ctx, +- const struct nft_set_ext *ext) +-{ +- const struct nft_data *data; +- int ret; +- +- data = nft_set_ext_data(ext); +- switch (data->verdict.code) { +- case NFT_JUMP: +- case NFT_GOTO: +- ret = nf_tables_check_loops(ctx, data->verdict.chain); +- break; +- default: +- ret = 0; +- break; +- } +- +- return ret; +-} +- +-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, +- struct nft_set *set, +- const struct nft_set_iter *iter, +- struct nft_set_elem *elem) +-{ +- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); +- +- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && +- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) +- return 0; +- +- return nft_check_loops(ctx, ext); +-} +- +-static int nf_tables_check_loops(const struct nft_ctx *ctx, +- const struct nft_chain *chain) +-{ +- const struct nft_rule *rule; +- const struct nft_expr *expr, *last; +- struct nft_set *set; +- struct nft_set_binding *binding; +- struct nft_set_iter iter; +- +- if (ctx->chain == chain) +- return -ELOOP; +- +- list_for_each_entry(rule, &chain->rules, list) { +- nft_rule_for_each_expr(expr, last, rule) { +- struct nft_immediate_expr *priv; +- const struct nft_data *data; +- int err; +- +- if (strcmp(expr->ops->type->name, "immediate")) +- continue; +- +- priv = nft_expr_priv(expr); +- if (priv->dreg != NFT_REG_VERDICT) +- continue; +- +- data = &priv->data; +- switch (data->verdict.code) { +- case NFT_JUMP: +- case NFT_GOTO: +- err = nf_tables_check_loops(ctx, +- data->verdict.chain); +- if (err < 0) +- return err; +- break; +- default: +- break; +- } +- } +- } +- +- list_for_each_entry(set, &ctx->table->sets, list) { +- if (!nft_is_active_next(ctx->net, set)) +- continue; +- if (!(set->flags & NFT_SET_MAP) || +- set->dtype != NFT_DATA_VERDICT) +- continue; +- +- list_for_each_entry(binding, &set->bindings, list) { +- if (!(binding->flags & NFT_SET_MAP) || +- binding->chain != chain) +- continue; +- +- iter.genmask = nft_genmask_next(ctx->net); +- iter.skip = 0; +- iter.count = 0; +- iter.err = 0; +- iter.fn = nf_tables_loop_check_setelem; +- +- set->ops->walk(ctx, set, &iter); +- if (iter.err < 0) +- return iter.err; +- } +- } +- +- return 0; +-} +- + /** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * +@@ -9335,7 +9234,7 @@ static int nft_validate_register_store(c + if (data != NULL && + (data->verdict.code == NFT_GOTO || + data->verdict.code == NFT_JUMP)) { +- err = nf_tables_check_loops(ctx, data->verdict.chain); ++ err = nft_chain_validate(ctx, data->verdict.chain); + if (err < 0) + return err; + } diff --git a/queue-5.10/netfilter-nf_tables-set-element-extended-ack-reporting-support.patch b/queue-5.10/netfilter-nf_tables-set-element-extended-ack-reporting-support.patch new file mode 100644 index 00000000000..f7d0bd0038b --- /dev/null +++ b/queue-5.10/netfilter-nf_tables-set-element-extended-ack-reporting-support.patch @@ -0,0 +1,60 @@ +From stable+bounces-66451-greg=kroah.com@vger.kernel.org Mon Aug 12 12:28:27 2024 +From: Pablo Neira Ayuso +Date: Mon, 12 Aug 2024 12:27:39 +0200 +Subject: netfilter: nf_tables: set element extended ACK reporting support +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240812102742.388214-2-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit b53c116642502b0c85ecef78bff4f826a7dd4145 upstream. + +Report the element that causes problems via netlink extended ACK for set +element commands. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5354,8 +5354,10 @@ static int nf_tables_getsetelem(struct n + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&ctx, set, attr); +- if (err < 0) ++ if (err < 0) { ++ NL_SET_BAD_ATTR(extack, attr); + break; ++ } + } + + return err; +@@ -5848,8 +5850,10 @@ static int nf_tables_newsetelem(struct n + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); +- if (err < 0) ++ if (err < 0) { ++ NL_SET_BAD_ATTR(extack, attr); + return err; ++ } + } + + if (nft_net->validate_state == NFT_VALIDATE_DO) +@@ -6058,9 +6062,10 @@ static int nf_tables_delsetelem(struct n + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_del_setelem(&ctx, set, attr); +- if (err < 0) ++ if (err < 0) { ++ NL_SET_BAD_ATTR(extack, attr); + break; +- ++ } + set->ndeact++; + } + return err; diff --git a/queue-5.10/netfilter-nf_tables-use-timestamp-to-check-for-set-element-timeout.patch b/queue-5.10/netfilter-nf_tables-use-timestamp-to-check-for-set-element-timeout.patch new file mode 100644 index 00000000000..3fff9efadfd --- /dev/null +++ b/queue-5.10/netfilter-nf_tables-use-timestamp-to-check-for-set-element-timeout.patch @@ -0,0 +1,282 @@ +From stable+bounces-66453-greg=kroah.com@vger.kernel.org Mon Aug 12 12:28:39 2024 +From: Pablo Neira Ayuso +Date: Mon, 12 Aug 2024 12:27:40 +0200 +Subject: netfilter: nf_tables: use timestamp to check for set element timeout +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240812102742.388214-3-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 7395dfacfff65e9938ac0889dafa1ab01e987d15 upstream + +Add a timestamp field at the beginning of the transaction, store it +in the nftables per-netns area. + +Update set backend .insert, .deactivate and sync gc path to use the +timestamp, this avoids that an element expires while control plane +transaction is still unfinished. + +.lookup and .update, which are used from packet path, still use the +current time to check if the element has expired. And .get path and dump +also since this runs lockless under rcu read size lock. Then, there is +async gc which also needs to check the current time since it runs +asynchronously from a workqueue. + +[ NB: rbtree GC updates has been excluded because GC is asynchronous. ] + +Fixes: c3e1b005ed1c ("netfilter: nf_tables: add set element timeout support") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 21 +++++++++++++++++++-- + net/netfilter/nf_tables_api.c | 1 + + net/netfilter/nft_set_hash.c | 8 +++++++- + net/netfilter/nft_set_pipapo.c | 18 +++++++++++------- + net/netfilter/nft_set_rbtree.c | 6 ++++-- + 5 files changed, 42 insertions(+), 12 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #define NFT_MAX_HOOKS (NF_INET_INGRESS + 1) + +@@ -686,10 +687,16 @@ static inline struct nft_expr *nft_set_e + return nft_set_ext(ext, NFT_SET_EXT_EXPR); + } + +-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) ++static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, ++ u64 tstamp) + { + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && +- time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); ++ time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); ++} ++ ++static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) ++{ ++ return __nft_set_elem_expired(ext, get_jiffies_64()); + } + + static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, +@@ -1580,9 +1587,19 @@ struct nftables_pernet { + struct list_head module_list; + struct list_head notify_list; + struct mutex commit_mutex; ++ u64 tstamp; + unsigned int base_seq; + u8 validate_state; + unsigned int gc_seq; + }; + ++extern unsigned int nf_tables_net_id; ++ ++static inline u64 nft_net_tstamp(const struct net *net) ++{ ++ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); ++ ++ return nft_net->tstamp; ++} ++ + #endif /* _NET_NF_TABLES_H */ +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9034,6 +9034,7 @@ static bool nf_tables_valid_genid(struct + bool genid_ok; + + mutex_lock(&nft_net->commit_mutex); ++ nft_net->tstamp = get_jiffies_64(); + + genid_ok = genid == 0 || nft_net->base_seq == genid; + if (!genid_ok) +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -38,6 +38,7 @@ struct nft_rhash_cmp_arg { + const struct nft_set *set; + const u32 *key; + u8 genmask; ++ u64 tstamp; + }; + + static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) +@@ -64,7 +65,7 @@ static inline int nft_rhash_cmp(struct r + return 1; + if (nft_set_elem_is_dead(&he->ext)) + return 1; +- if (nft_set_elem_expired(&he->ext)) ++ if (__nft_set_elem_expired(&he->ext, x->tstamp)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; +@@ -88,6 +89,7 @@ static bool nft_rhash_lookup(const struc + .genmask = nft_genmask_cur(net), + .set = set, + .key = key, ++ .tstamp = get_jiffies_64(), + }; + + he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); +@@ -106,6 +108,7 @@ static void *nft_rhash_get(const struct + .genmask = nft_genmask_cur(net), + .set = set, + .key = elem->key.val.data, ++ .tstamp = get_jiffies_64(), + }; + + he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); +@@ -129,6 +132,7 @@ static bool nft_rhash_update(struct nft_ + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, ++ .tstamp = get_jiffies_64(), + }; + + he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); +@@ -172,6 +176,7 @@ static int nft_rhash_insert(const struct + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, ++ .tstamp = nft_net_tstamp(net), + }; + struct nft_rhash_elem *prev; + +@@ -214,6 +219,7 @@ static void *nft_rhash_deactivate(const + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, ++ .tstamp = nft_net_tstamp(net), + }; + + rcu_read_lock(); +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -504,6 +504,7 @@ out: + * @set: nftables API set representation + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask ++ * @tstamp: timestamp to check for expired elements + * + * This is essentially the same as the lookup function, except that it matches + * key data against the uncommitted copy and doesn't use preallocated maps for +@@ -513,7 +514,8 @@ out: + */ + static struct nft_pipapo_elem *pipapo_get(const struct net *net, + const struct nft_set *set, +- const u8 *data, u8 genmask) ++ const u8 *data, u8 genmask, ++ u64 tstamp) + { + struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); + struct nft_pipapo *priv = nft_set_priv(set); +@@ -566,7 +568,7 @@ next_match: + goto out; + + if (last) { +- if (nft_set_elem_expired(&f->mt[b].e->ext)) ++ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) + goto next_match; + if ((genmask && + !nft_set_elem_active(&f->mt[b].e->ext, genmask))) +@@ -603,7 +605,7 @@ static void *nft_pipapo_get(const struct + const struct nft_set_elem *elem, unsigned int flags) + { + return pipapo_get(net, set, (const u8 *)elem->key.val.data, +- nft_genmask_cur(net)); ++ nft_genmask_cur(net), get_jiffies_64()); + } + + /** +@@ -1197,6 +1199,7 @@ static int nft_pipapo_insert(const struc + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *m = priv->clone; + u8 genmask = nft_genmask_next(net); ++ u64 tstamp = nft_net_tstamp(net); + struct nft_pipapo_field *f; + const u8 *start_p, *end_p; + int i, bsize_max, err = 0; +@@ -1206,7 +1209,7 @@ static int nft_pipapo_insert(const struc + else + end = start; + +- dup = pipapo_get(net, set, start, genmask); ++ dup = pipapo_get(net, set, start, genmask, tstamp); + if (!IS_ERR(dup)) { + /* Check if we already have the same exact entry */ + const struct nft_data *dup_key, *dup_end; +@@ -1228,7 +1231,7 @@ static int nft_pipapo_insert(const struc + + if (PTR_ERR(dup) == -ENOENT) { + /* Look for partially overlapping entries */ +- dup = pipapo_get(net, set, end, nft_genmask_next(net)); ++ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); + } + + if (PTR_ERR(dup) != -ENOENT) { +@@ -1580,6 +1583,7 @@ static void pipapo_gc(const struct nft_s + struct nft_set *set = (struct nft_set *) _set; + struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); ++ u64 tstamp = nft_net_tstamp(net); + int rules_f0, first_rule = 0; + struct nft_trans_gc *gc; + +@@ -1613,7 +1617,7 @@ static void pipapo_gc(const struct nft_s + /* synchronous gc never fails, there is no need to set on + * NFT_SET_ELEM_DEAD_BIT. + */ +- if (nft_set_elem_expired(&e->ext)) { ++ if (__nft_set_elem_expired(&e->ext, tstamp)) { + priv->dirty = true; + + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); +@@ -1772,7 +1776,7 @@ static void *pipapo_deactivate(const str + { + struct nft_pipapo_elem *e; + +- e = pipapo_get(net, set, data, nft_genmask_next(net)); ++ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); + if (IS_ERR(e)) + return NULL; + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -316,6 +316,7 @@ static int __nft_rbtree_insert(const str + struct nft_rbtree *priv = nft_set_priv(set); + u8 cur_genmask = nft_genmask_cur(net); + u8 genmask = nft_genmask_next(net); ++ u64 tstamp = nft_net_tstamp(net); + int d; + + /* Descend the tree to search for an existing element greater than the +@@ -363,7 +364,7 @@ static int __nft_rbtree_insert(const str + /* perform garbage collection to avoid bogus overlap reports + * but skip new elements in this transaction. + */ +- if (nft_set_elem_expired(&rbe->ext) && ++ if (__nft_set_elem_expired(&rbe->ext, tstamp) && + nft_set_elem_active(&rbe->ext, cur_genmask)) { + const struct nft_rbtree_elem *removed_end; + +@@ -550,6 +551,7 @@ static void *nft_rbtree_deactivate(const + const struct rb_node *parent = priv->root.rb_node; + struct nft_rbtree_elem *rbe, *this = elem->priv; + u8 genmask = nft_genmask_next(net); ++ u64 tstamp = nft_net_tstamp(net); + int d; + + while (parent != NULL) { +@@ -570,7 +572,7 @@ static void *nft_rbtree_deactivate(const + nft_rbtree_interval_end(this)) { + parent = parent->rb_right; + continue; +- } else if (nft_set_elem_expired(&rbe->ext)) { ++ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { + break; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; diff --git a/queue-5.10/pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch b/queue-5.10/pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch new file mode 100644 index 00000000000..9446e346862 --- /dev/null +++ b/queue-5.10/pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch @@ -0,0 +1,124 @@ +From stable+bounces-62754-greg=kroah.com@vger.kernel.org Tue Jul 30 12:30:10 2024 +From: Lukas Wunner +Date: Tue, 30 Jul 2024 12:26:55 +0200 +Subject: PCI/DPC: Fix use-after-free on concurrent DPC and hot-removal +To: Greg Kroah-Hartman , Sasha Levin +Cc: stable@vger.kernel.org, linux-pci@vger.kernel.org, Keith Busch , Mika Westerberg , Bjorn Helgaas , Krzysztof Wilczynski +Message-ID: <4d4ac43578f9aad9d7eee85a51833ae5e4b7cdb3.1722335040.git.lukas@wunner.de> + +From: Lukas Wunner + +commit 11a1f4bc47362700fcbde717292158873fb847ed upstream. + +Keith reports a use-after-free when a DPC event occurs concurrently to +hot-removal of the same portion of the hierarchy: + +The dpc_handler() awaits readiness of the secondary bus below the +Downstream Port where the DPC event occurred. To do so, it polls the +config space of the first child device on the secondary bus. If that +child device is concurrently removed, accesses to its struct pci_dev +cause the kernel to oops. + +That's because pci_bridge_wait_for_secondary_bus() neglects to hold a +reference on the child device. Before v6.3, the function was only +called on resume from system sleep or on runtime resume. Holding a +reference wasn't necessary back then because the pciehp IRQ thread +could never run concurrently. (On resume from system sleep, IRQs are +not enabled until after the resume_noirq phase. And runtime resume is +always awaited before a PCI device is removed.) + +However starting with v6.3, pci_bridge_wait_for_secondary_bus() is also +called on a DPC event. Commit 53b54ad074de ("PCI/DPC: Await readiness +of secondary bus after reset"), which introduced that, failed to +appreciate that pci_bridge_wait_for_secondary_bus() now needs to hold a +reference on the child device because dpc_handler() and pciehp may +indeed run concurrently. The commit was backported to v5.10+ stable +kernels, so that's the oldest one affected. + +Add the missing reference acquisition. + +Abridged stack trace: + + BUG: unable to handle page fault for address: 00000000091400c0 + CPU: 15 PID: 2464 Comm: irq/53-pcie-dpc 6.9.0 + RIP: pci_bus_read_config_dword+0x17/0x50 + pci_dev_wait() + pci_bridge_wait_for_secondary_bus() + dpc_reset_link() + pcie_do_recovery() + dpc_handler() + +Fixes: 53b54ad074de ("PCI/DPC: Await readiness of secondary bus after reset") +Closes: https://lore.kernel.org/r/20240612181625.3604512-3-kbusch@meta.com/ +Link: https://lore.kernel.org/linux-pci/8e4bcd4116fd94f592f2bf2749f168099c480ddf.1718707743.git.lukas@wunner.de +Reported-by: Keith Busch +Tested-by: Keith Busch +Signed-off-by: Lukas Wunner +Signed-off-by: Krzysztof Wilczyński +Reviewed-by: Keith Busch +Reviewed-by: Mika Westerberg +Cc: stable@vger.kernel.org # v5.10+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pci.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -4817,7 +4817,7 @@ int pci_bridge_wait_for_secondary_bus(st + int timeout) + { + struct pci_dev *child; +- int delay; ++ int delay, ret = 0; + + if (pci_dev_is_disconnected(dev)) + return 0; +@@ -4845,8 +4845,8 @@ int pci_bridge_wait_for_secondary_bus(st + return 0; + } + +- child = list_first_entry(&dev->subordinate->devices, struct pci_dev, +- bus_list); ++ child = pci_dev_get(list_first_entry(&dev->subordinate->devices, ++ struct pci_dev, bus_list)); + up_read(&pci_bus_sem); + + /* +@@ -4856,7 +4856,7 @@ int pci_bridge_wait_for_secondary_bus(st + if (!pci_is_pcie(dev)) { + pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay); + msleep(1000 + delay); +- return 0; ++ goto put_child; + } + + /* +@@ -4877,7 +4877,7 @@ int pci_bridge_wait_for_secondary_bus(st + * until the timeout expires. + */ + if (!pcie_downstream_port(dev)) +- return 0; ++ goto put_child; + + if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) { + pci_dbg(dev, "waiting %d ms for downstream link\n", delay); +@@ -4888,11 +4888,16 @@ int pci_bridge_wait_for_secondary_bus(st + if (!pcie_wait_for_link_delay(dev, true, delay)) { + /* Did not train, no need to wait any further */ + pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); +- return -ENOTTY; ++ ret = -ENOTTY; ++ goto put_child; + } + } + +- return pci_dev_wait(child, reset_type, timeout - delay); ++ ret = pci_dev_wait(child, reset_type, timeout - delay); ++ ++put_child: ++ pci_dev_put(child); ++ return ret; + } + + void pci_reset_secondary_bus(struct pci_dev *dev) diff --git a/queue-5.10/series b/queue-5.10/series index 023e2691363..2a684876e46 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -345,3 +345,9 @@ ipv6-fix-source-address-selection-with-route-leak.patch samples-add-fs-error-monitoring-example.patch samples-make-fs-monitor-depend-on-libc-and-headers.patch add-gitignore-file-for-samples-fanotify-subdirectory.patch +fix-gcc-4.9-build-issue-in-5.10.y.patch +pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch +netfilter-nf_tables-set-element-extended-ack-reporting-support.patch +netfilter-nf_tables-use-timestamp-to-check-for-set-element-timeout.patch +netfilter-nf_tables-allow-clone-callbacks-to-sleep.patch +netfilter-nf_tables-prefer-nft_chain_validate.patch