From: Greg Kroah-Hartman Date: Thu, 13 Jun 2024 07:08:52 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v4.19.316~82 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=847bcbab60d3fe23fa9cba6852fad44d6b1823b1;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch netfilter-nf_tables-fix-table-flag-updates.patch netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch netfilter-nf_tables-gc-transaction-race-with-abort-path.patch netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch netfilter-nf_tables-pass-context-to-nft_set_destroy.patch netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch netfilter-nf_tables-validate-nfproto_-family.patch netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch netfilter-nft_set_rbtree-add-missing-expired-checks.patch netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch netfilter-nftables-update-table-flags-from-the-commit-phase.patch --- diff --git a/queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch b/queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch new file mode 100644 index 00000000000..05a7adfb5cb --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch @@ -0,0 +1,450 @@ +From stable+bounces-50350-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:56 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:40 +0200 +Subject: netfilter: nf_tables: adapt set backend to use GC transaction API +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-12-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream. + +Use the GC transaction API to replace the old and buggy gc API and the +busy mark approach. + +No set elements are removed from async garbage collection anymore, +instead the _DEAD bit is set on so the set element is not visible from +lookup path anymore. Async GC enqueues transaction work that might be +aborted and retried later. + +rbtree and pipapo set backends does not set on the _DEAD bit from the +sync GC path since this runs in control plane path where mutex is held. +In this case, set elements are deactivated, removed and then released +via RCU callback, sync GC never fails. + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") +Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_hash.c | 82 +++++++++++++++-------- + net/netfilter/nft_set_rbtree.c | 142 +++++++++++++++++++++++++---------------- + 2 files changed, 143 insertions(+), 81 deletions(-) + +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -20,6 +20,9 @@ + #include + #include + #include ++#include ++ ++extern unsigned int nf_tables_net_id; + + /* We target a hash table size of 4, element hint is 75% of final size */ + #define NFT_RHASH_ELEMENT_HINT 3 +@@ -62,6 +65,8 @@ static inline int nft_rhash_cmp(struct r + + if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; ++ if (nft_set_elem_is_dead(&he->ext)) ++ return 1; + if (nft_set_elem_expired(&he->ext)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) +@@ -190,7 +195,6 @@ static void nft_rhash_activate(const str + struct nft_rhash_elem *he = elem->priv; + + nft_set_elem_change_active(net, set, &he->ext); +- nft_set_elem_clear_busy(&he->ext); + } + + static bool nft_rhash_flush(const struct net *net, +@@ -198,12 +202,9 @@ static bool nft_rhash_flush(const struct + { + struct nft_rhash_elem *he = priv; + +- if (!nft_set_elem_mark_busy(&he->ext) || +- !nft_is_active(net, &he->ext)) { +- nft_set_elem_change_active(net, set, &he->ext); +- return true; +- } +- return false; ++ nft_set_elem_change_active(net, set, &he->ext); ++ ++ return true; + } + + static void *nft_rhash_deactivate(const struct net *net, +@@ -220,9 +221,8 @@ static void *nft_rhash_deactivate(const + + rcu_read_lock(); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); +- if (he != NULL && +- !nft_rhash_flush(net, set, he)) +- he = NULL; ++ if (he) ++ nft_set_elem_change_active(net, set, &he->ext); + + rcu_read_unlock(); + +@@ -288,55 +288,80 @@ out: + + static void nft_rhash_gc(struct work_struct *work) + { ++ struct nftables_pernet *nft_net; + struct nft_set *set; + struct nft_rhash_elem *he; + struct nft_rhash *priv; +- struct nft_set_gc_batch *gcb = NULL; + struct rhashtable_iter hti; ++ struct nft_trans_gc *gc; ++ struct net *net; ++ u32 gc_seq; + int err; + + priv = container_of(work, struct nft_rhash, gc_work.work); + set = nft_set_container_of(priv); ++ net = read_pnet(&set->net); ++ nft_net = net_generic(net, nf_tables_net_id); ++ gc_seq = READ_ONCE(nft_net->gc_seq); ++ ++ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); ++ if (!gc) ++ goto done; + + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); +- if (err) +- goto schedule; ++ if (err) { ++ nft_trans_gc_destroy(gc); ++ goto done; ++ } + + rhashtable_walk_start(&hti); + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { +- if (PTR_ERR(he) != -EAGAIN) +- goto out; ++ if (PTR_ERR(he) != -EAGAIN) { ++ nft_trans_gc_destroy(gc); ++ gc = NULL; ++ goto try_later; ++ } + continue; + } + ++ /* Ruleset has been updated, try later. */ ++ if (READ_ONCE(nft_net->gc_seq) != gc_seq) { ++ nft_trans_gc_destroy(gc); ++ gc = NULL; ++ goto try_later; ++ } ++ ++ if (nft_set_elem_is_dead(&he->ext)) ++ goto dead_elem; ++ + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) { + struct nft_expr *expr = nft_set_ext_expr(&he->ext); + + if (expr->ops->gc && + expr->ops->gc(read_pnet(&set->net), expr)) +- goto gc; ++ goto needs_gc_run; + } + if (!nft_set_elem_expired(&he->ext)) + continue; +-gc: +- if (nft_set_elem_mark_busy(&he->ext)) +- continue; ++needs_gc_run: ++ nft_set_elem_dead(&he->ext); ++dead_elem: ++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); ++ if (!gc) ++ goto try_later; + +- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); +- if (gcb == NULL) +- goto out; +- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); +- atomic_dec(&set->nelems); +- nft_set_gc_batch_add(gcb, he); ++ nft_trans_gc_elem_add(gc, he); + } +-out: ++try_later: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + +- nft_set_gc_batch_complete(gcb); +-schedule: ++ if (gc) ++ nft_trans_gc_queue_async_done(gc); ++ ++done: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + } +@@ -399,7 +424,6 @@ static void nft_rhash_destroy(const stru + }; + + cancel_delayed_work_sync(&priv->gc_work); +- rcu_barrier(); + rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, + (void *)&rhash_ctx); + } +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -17,6 +17,9 @@ + #include + #include + #include ++#include ++ ++extern unsigned int nf_tables_net_id; + + struct nft_rbtree { + struct rb_root root; +@@ -49,6 +52,12 @@ static int nft_rbtree_cmp(const struct n + set->klen); + } + ++static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) ++{ ++ return nft_set_elem_expired(&rbe->ext) || ++ nft_set_elem_is_dead(&rbe->ext); ++} ++ + static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext, + unsigned int seq) +@@ -83,7 +92,7 @@ static bool __nft_rbtree_lookup(const st + continue; + } + +- if (nft_set_elem_expired(&rbe->ext)) ++ if (nft_rbtree_elem_expired(rbe)) + return false; + + if (nft_rbtree_interval_end(rbe)) { +@@ -101,7 +110,7 @@ static bool __nft_rbtree_lookup(const st + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && +- !nft_set_elem_expired(&interval->ext) && ++ !nft_rbtree_elem_expired(interval) && + nft_rbtree_interval_start(interval)) { + *ext = &interval->ext; + return true; +@@ -217,6 +226,18 @@ static void *nft_rbtree_get(const struct + return rbe; + } + ++static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, ++ struct nft_rbtree *priv, ++ struct nft_rbtree_elem *rbe) ++{ ++ struct nft_set_elem elem = { ++ .priv = rbe, ++ }; ++ ++ nft_setelem_data_deactivate(net, set, &elem); ++ rb_erase(&rbe->node, &priv->root); ++} ++ + static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe, +@@ -224,11 +245,12 @@ static int nft_rbtree_gc_elem(const stru + { + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); ++ struct net *net = read_pnet(&set->net); + struct nft_rbtree_elem *rbe_prev; +- struct nft_set_gc_batch *gcb; ++ struct nft_trans_gc *gc; + +- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); +- if (!gcb) ++ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); ++ if (!gc) + return -ENOMEM; + + /* search for end interval coming before this element. +@@ -246,17 +268,28 @@ static int nft_rbtree_gc_elem(const stru + + if (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); ++ nft_rbtree_gc_remove(net, set, priv, rbe_prev); + +- rb_erase(&rbe_prev->node, &priv->root); +- atomic_dec(&set->nelems); +- nft_set_gc_batch_add(gcb, rbe_prev); ++ /* There is always room in this trans gc for this element, ++ * memory allocation never actually happens, hence, the warning ++ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, ++ * this is synchronous gc which never fails. ++ */ ++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); ++ if (WARN_ON_ONCE(!gc)) ++ return -ENOMEM; ++ ++ nft_trans_gc_elem_add(gc, rbe_prev); + } + +- rb_erase(&rbe->node, &priv->root); +- atomic_dec(&set->nelems); ++ nft_rbtree_gc_remove(net, set, priv, rbe); ++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); ++ if (WARN_ON_ONCE(!gc)) ++ return -ENOMEM; ++ ++ nft_trans_gc_elem_add(gc, rbe); + +- nft_set_gc_batch_add(gcb, rbe); +- nft_set_gc_batch_complete(gcb); ++ nft_trans_gc_queue_sync_done(gc); + + return 0; + } +@@ -484,7 +517,6 @@ static void nft_rbtree_activate(const st + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(net, set, &rbe->ext); +- nft_set_elem_clear_busy(&rbe->ext); + } + + static bool nft_rbtree_flush(const struct net *net, +@@ -492,12 +524,9 @@ static bool nft_rbtree_flush(const struc + { + struct nft_rbtree_elem *rbe = priv; + +- if (!nft_set_elem_mark_busy(&rbe->ext) || +- !nft_is_active(net, &rbe->ext)) { +- nft_set_elem_change_active(net, set, &rbe->ext); +- return true; +- } +- return false; ++ nft_set_elem_change_active(net, set, &rbe->ext); ++ ++ return true; + } + + static void *nft_rbtree_deactivate(const struct net *net, +@@ -574,26 +603,40 @@ cont: + + static void nft_rbtree_gc(struct work_struct *work) + { +- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; +- struct nft_set_gc_batch *gcb = NULL; ++ struct nft_rbtree_elem *rbe, *rbe_end = NULL; ++ struct nftables_pernet *nft_net; + struct nft_rbtree *priv; ++ struct nft_trans_gc *gc; + struct rb_node *node; + struct nft_set *set; ++ unsigned int gc_seq; + struct net *net; +- u8 genmask; + + priv = container_of(work, struct nft_rbtree, gc_work.work); + set = nft_set_container_of(priv); + net = read_pnet(&set->net); +- genmask = nft_genmask_cur(net); ++ nft_net = net_generic(net, nf_tables_net_id); ++ gc_seq = READ_ONCE(nft_net->gc_seq); ++ ++ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); ++ if (!gc) ++ goto done; + + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { ++ ++ /* Ruleset has been updated, try later. */ ++ if (READ_ONCE(nft_net->gc_seq) != gc_seq) { ++ nft_trans_gc_destroy(gc); ++ gc = NULL; ++ goto try_later; ++ } ++ + rbe = rb_entry(node, struct nft_rbtree_elem, node); + +- if (!nft_set_elem_active(&rbe->ext, genmask)) +- continue; ++ if (nft_set_elem_is_dead(&rbe->ext)) ++ goto dead_elem; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is +@@ -603,43 +646,38 @@ static void nft_rbtree_gc(struct work_st + rbe_end = rbe; + continue; + } ++ + if (!nft_set_elem_expired(&rbe->ext)) + continue; + +- if (nft_set_elem_mark_busy(&rbe->ext)) { +- rbe_end = NULL; ++ nft_set_elem_dead(&rbe->ext); ++ ++ if (!rbe_end) + continue; +- } + +- if (rbe_prev) { +- rb_erase(&rbe_prev->node, &priv->root); +- rbe_prev = NULL; +- } +- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); +- if (!gcb) +- break; ++ nft_set_elem_dead(&rbe_end->ext); + +- atomic_dec(&set->nelems); +- nft_set_gc_batch_add(gcb, rbe); +- rbe_prev = rbe; +- +- if (rbe_end) { +- atomic_dec(&set->nelems); +- nft_set_gc_batch_add(gcb, rbe_end); +- rb_erase(&rbe_end->node, &priv->root); +- rbe_end = NULL; +- } +- node = rb_next(node); +- if (!node) +- break; ++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); ++ if (!gc) ++ goto try_later; ++ ++ nft_trans_gc_elem_add(gc, rbe_end); ++ rbe_end = NULL; ++dead_elem: ++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); ++ if (!gc) ++ goto try_later; ++ ++ nft_trans_gc_elem_add(gc, rbe); + } +- if (rbe_prev) +- rb_erase(&rbe_prev->node, &priv->root); ++ ++try_later: + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); + +- nft_set_gc_batch_complete(gcb); +- ++ if (gc) ++ nft_trans_gc_queue_async_done(gc); ++done: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + } diff --git a/queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch b/queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch new file mode 100644 index 00000000000..d66a665fd45 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch @@ -0,0 +1,106 @@ +From stable+bounces-50376-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:11 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:05 +0200 +Subject: netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate() +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-37-pablo@netfilter.org> + +From: Ignat Korchagin + +[ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ] + +Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added +some validation of NFPROTO_* families in the nft_compat module, but it broke +the ability to use legacy iptables modules in dual-stack nftables. + +While with legacy iptables one had to independently manage IPv4 and IPv6 +tables, with nftables it is possible to have dual-stack tables sharing the +rules. Moreover, it was possible to use rules based on legacy iptables +match/target modules in dual-stack nftables. + +As an example, the program from [2] creates an INET dual-stack family table +using an xt_bpf based rule, which looks like the following (the actual output +was generated with a patched nft tool as the current nft tool does not parse +dual stack tables with legacy match rules, so consider it for illustrative +purposes only): + +table inet testfw { + chain input { + type filter hook prerouting priority filter; policy accept; + bytecode counter packets 0 bytes 0 accept + } +} + +After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get +EOPNOTSUPP for the above program. + +Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also +restrict the functions to classic iptables hooks. + +Changes in v3: + * clarify that upstream nft will not display such configuration properly and + that the output was generated with a patched nft tool + * remove example program from commit description and link to it instead + * no code changes otherwise + +Changes in v2: + * restrict nft_(match/target)_validate() to classic iptables hooks + * rewrite example program to use unmodified libnftnl + +Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") +Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1] +Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2] +Reported-by: Jordan Griege +Signed-off-by: Ignat Korchagin +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_compat.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -321,10 +321,20 @@ static int nft_target_validate(const str + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +@@ -568,10 +578,20 @@ static int nft_match_validate(const stru + + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + ++ ret = nft_chain_validate_hooks(ctx->chain, ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD) | ++ (1 << NF_INET_LOCAL_OUT) | ++ (1 << NF_INET_POST_ROUTING)); ++ if (ret) ++ return ret; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); diff --git a/queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch b/queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch new file mode 100644 index 00000000000..75df797b048 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch @@ -0,0 +1,39 @@ +From stable+bounces-50366-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:37 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:55 +0200 +Subject: netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 4.19) +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-27-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +3f0465a9ef02 ("netfilter: nf_tables: dynamically allocate hooks per +net_device in flowtables") reworks flowtable support to allow for +dynamic allocation of hooks, which implicitly fixes the following +bogus EBUSY in transaction: + + delete flowtable + add flowtable # same flowtable with same devices, it hits EBUSY + +This patch does not exist in any tree, but it fixes this issue for +-stable Linux kernel 4.19 + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5956,6 +5956,9 @@ static int nf_tables_newflowtable(struct + continue; + + list_for_each_entry(ft, &table->flowtables, list) { ++ if (!nft_is_active_next(net, ft)) ++ continue; ++ + for (k = 0; k < ft->ops_len; k++) { + if (!ft->ops[k].dev) + continue; diff --git a/queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch b/queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch new file mode 100644 index 00000000000..231cddba977 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch @@ -0,0 +1,74 @@ +From stable+bounces-50355-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:06 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:45 +0200 +Subject: netfilter: nf_tables: defer gc run if previous batch is still pending +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-17-pablo@netfilter.org> + +From: Florian Westphal + +commit 8e51830e29e12670b4c10df070a4ea4c9593e961 upstream. + +Don't queue more gc work, else we may queue the same elements multiple +times. + +If an element is flagged as dead, this can mean that either the previous +gc request was invalidated/discarded by a transaction or that the previous +request is still pending in the system work queue. + +The latter will happen if the gc interval is set to a very low value, +e.g. 1ms, and system work queue is backlogged. + +The sets refcount is 1 if no previous gc requeusts are queued, so add +a helper for this and skip gc run if old requests are pending. + +Add a helper for this and skip the gc run in this case. + +Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 5 +++++ + net/netfilter/nft_set_hash.c | 3 +++ + net/netfilter/nft_set_rbtree.c | 3 +++ + 3 files changed, 11 insertions(+) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -445,6 +445,11 @@ static inline void *nft_set_priv(const s + return (void *)set->data; + } + ++static inline bool nft_set_gc_is_pending(const struct nft_set *s) ++{ ++ return refcount_read(&s->refs) != 1; ++} ++ + static inline struct nft_set *nft_set_container_of(const void *priv) + { + return (void *)priv - offsetof(struct nft_set, data); +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -304,6 +304,9 @@ static void nft_rhash_gc(struct work_str + nft_net = net_generic(net, nf_tables_net_id); + gc_seq = READ_ONCE(nft_net->gc_seq); + ++ if (nft_set_gc_is_pending(set)) ++ goto done; ++ + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -618,6 +618,9 @@ static void nft_rbtree_gc(struct work_st + nft_net = net_generic(net, nf_tables_net_id); + gc_seq = READ_ONCE(nft_net->gc_seq); + ++ if (nft_set_gc_is_pending(set)) ++ goto done; ++ + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; diff --git a/queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch b/queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch new file mode 100644 index 00000000000..71886af8c1e --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch @@ -0,0 +1,56 @@ +From stable+bounces-50365-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:36 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:54 +0200 +Subject: netfilter: nf_tables: disable toggling dormant table state more than once +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-26-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 upstream. + +nft -f -< +Cc: Bing-Jhong Billy Jheng +Cc: info@starlabs.sg +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -893,6 +893,10 @@ static int nf_tables_updtable(struct nft + if (flags == ctx->table->flags) + return 0; + ++ /* No dormant off/on/off/on games in single transaction */ ++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ return -EINVAL; ++ + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, + sizeof(struct nft_trans_table)); + if (trans == NULL) diff --git a/queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch b/queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch new file mode 100644 index 00000000000..46962fe6048 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch @@ -0,0 +1,61 @@ +From stable+bounces-50380-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:22 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:09 +0200 +Subject: netfilter: nf_tables: discard table flag update with pending basechain deletion +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-41-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 1bc83a019bbe268be3526406245ec28c2458a518 upstream. + +Hook unregistration is deferred to the commit phase, same occurs with +hook updates triggered by the table dormant flag. When both commands are +combined, this results in deleting a basechain while leaving its hook +still registered in the core. + +Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -877,6 +877,24 @@ static void nf_tables_table_disable(stru + #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) + ++static bool nft_table_pending_update(const struct nft_ctx *ctx) ++{ ++ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); ++ struct nft_trans *trans; ++ ++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ return true; ++ ++ list_for_each_entry(trans, &nft_net->commit_list, list) { ++ if (trans->ctx.table == ctx->table && ++ trans->msg_type == NFT_MSG_DELCHAIN && ++ nft_is_base_chain(trans->ctx.chain)) ++ return true; ++ } ++ ++ return false; ++} ++ + static int nf_tables_updtable(struct nft_ctx *ctx) + { + struct nft_trans *trans; +@@ -894,7 +912,7 @@ static int nf_tables_updtable(struct nft + return 0; + + /* No dormant off/on/off/on games in single transaction */ +- if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ if (nft_table_pending_update(ctx)) + return -EINVAL; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, diff --git a/queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch b/queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch new file mode 100644 index 00000000000..d8fce0a7e6b --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch @@ -0,0 +1,33 @@ +From stable+bounces-50377-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:15 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:06 +0200 +Subject: netfilter: nf_tables: do not compare internal table flags on updates +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-38-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +[ Upstream commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139 ] + +Restore skipping transaction if table update does not modify flags. + +Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -890,7 +890,7 @@ static int nf_tables_updtable(struct nft + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + +- if (flags == ctx->table->flags) ++ if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) + return 0; + + /* No dormant off/on/off/on games in single transaction */ diff --git a/queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch b/queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch new file mode 100644 index 00000000000..a36c7f748f1 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch @@ -0,0 +1,90 @@ +From stable+bounces-50349-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:50 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:38 +0200 +Subject: netfilter: nf_tables: don't skip expired elements during walk +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-10-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 24138933b97b055d486e8064b4a1721702442a9b upstream. + +There is an asymmetry between commit/abort and preparation phase if the +following conditions are met: + +1. set is a verdict map ("1.2.3.4 : jump foo") +2. timeouts are enabled + +In this case, following sequence is problematic: + +1. element E in set S refers to chain C +2. userspace requests removal of set S +3. kernel does a set walk to decrement chain->use count for all elements + from preparation phase +4. kernel does another set walk to remove elements from the commit phase + (or another walk to do a chain->use increment for all elements from + abort phase) + +If E has already expired in 1), it will be ignored during list walk, so its use count +won't have been changed. + +Then, when set is culled, ->destroy callback will zap the element via +nf_tables_set_elem_destroy(), but this function is only safe for +elements that have been deactivated earlier from the preparation phase: +lack of earlier deactivate removes the element but leaks the chain use +count, which results in a WARN splat when the chain gets removed later, +plus a leak of the nft_chain structure. + +Update pipapo_get() not to skip expired elements, otherwise flush +command reports bogus ENOENT errors. + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") +Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 4 ++++ + net/netfilter/nft_set_hash.c | 2 -- + net/netfilter/nft_set_rbtree.c | 2 -- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4159,8 +4159,12 @@ static int nf_tables_dump_setelem(const + const struct nft_set_iter *iter, + struct nft_set_elem *elem) + { ++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_dump_args *args; + ++ if (nft_set_elem_expired(ext)) ++ return 0; ++ + args = container_of(iter, struct nft_set_dump_args, iter); + return nf_tables_fill_setelem(args->skb, set, elem); + } +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -268,8 +268,6 @@ static void nft_rhash_walk(const struct + + if (iter->count < iter->skip) + goto cont; +- if (nft_set_elem_expired(&he->ext)) +- goto cont; + if (!nft_set_elem_active(&he->ext, iter->genmask)) + goto cont; + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -556,8 +556,6 @@ static void nft_rbtree_walk(const struct + + if (iter->count < iter->skip) + goto cont; +- if (nft_set_elem_expired(&rbe->ext)) +- goto cont; + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) + goto cont; + diff --git a/queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch b/queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch new file mode 100644 index 00000000000..16a5b975c90 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch @@ -0,0 +1,126 @@ +From stable+bounces-50362-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:28 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:51 +0200 +Subject: netfilter: nf_tables: double hook unregistration in netns path +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-23-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream. + +__nft_release_hooks() is called from pre_netns exit path which +unregisters the hooks, then the NETDEV_UNREGISTER event is triggered +which unregisters the hooks again. + +[ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270 +[...] +[ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27 +[ 565.253682] Workqueue: netns cleanup_net +[ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270 +[...] +[ 565.297120] Call Trace: +[ 565.300900] +[ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables] +[ 565.308518] raw_notifier_call_chain+0x63/0x80 +[ 565.312386] unregister_netdevice_many+0x54f/0xb50 + +Unregister and destroy netdev hook from netns pre_exit via kfree_rcu +so the NETDEV_UNREGISTER path see unregistered hooks. + +Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 34 +++++++++++++++++++++++++++------- + net/netfilter/nft_chain_filter.c | 3 +++ + 2 files changed, 30 insertions(+), 7 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -204,9 +204,10 @@ static int nf_tables_register_hook(struc + return nf_register_net_hook(net, ops); + } + +-static void nf_tables_unregister_hook(struct net *net, +- const struct nft_table *table, +- struct nft_chain *chain) ++static void __nf_tables_unregister_hook(struct net *net, ++ const struct nft_table *table, ++ struct nft_chain *chain, ++ bool release_netdev) + { + const struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; +@@ -221,6 +222,16 @@ static void nf_tables_unregister_hook(st + return basechain->type->ops_unregister(net, ops); + + nf_unregister_net_hook(net, ops); ++ if (release_netdev && ++ table->family == NFPROTO_NETDEV) ++ nft_base_chain(chain)->ops.dev = NULL; ++} ++ ++static void nf_tables_unregister_hook(struct net *net, ++ const struct nft_table *table, ++ struct nft_chain *chain) ++{ ++ __nf_tables_unregister_hook(net, table, chain, false); + } + + static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) +@@ -5821,8 +5832,9 @@ nft_flowtable_type_get(struct net *net, + return ERR_PTR(-ENOENT); + } + +-static void nft_unregister_flowtable_net_hooks(struct net *net, +- struct nft_flowtable *flowtable) ++static void __nft_unregister_flowtable_net_hooks(struct net *net, ++ struct nft_flowtable *flowtable, ++ bool release_netdev) + { + int i; + +@@ -5831,9 +5843,17 @@ static void nft_unregister_flowtable_net + continue; + + nf_unregister_net_hook(net, &flowtable->ops[i]); ++ if (release_netdev) ++ flowtable->ops[i].dev = NULL; + } + } + ++static void nft_unregister_flowtable_net_hooks(struct net *net, ++ struct nft_flowtable *flowtable) ++{ ++ __nft_unregister_flowtable_net_hooks(net, flowtable, false); ++} ++ + static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, +@@ -7862,9 +7882,9 @@ static void __nft_release_table(struct n + ctx.family = table->family; + + list_for_each_entry(chain, &table->chains, list) +- nf_tables_unregister_hook(net, table, chain); ++ __nf_tables_unregister_hook(net, table, chain, true); + list_for_each_entry(flowtable, &table->flowtables, list) +- nft_unregister_flowtable_net_hooks(net, flowtable); ++ __nft_unregister_flowtable_net_hooks(net, flowtable, true); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { +--- a/net/netfilter/nft_chain_filter.c ++++ b/net/netfilter/nft_chain_filter.c +@@ -296,6 +296,9 @@ static void nft_netdev_event(unsigned lo + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + ++ if (!basechain->ops.dev) ++ return; ++ + /* UNREGISTER events are also happpening on netns exit. + * + * Altough nf_tables core releases all tables/chains, only diff --git a/queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch b/queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch new file mode 100644 index 00000000000..22057eb9e12 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch @@ -0,0 +1,328 @@ +From stable+bounces-50344-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:39 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:32 +0200 +Subject: netfilter: nf_tables: drop map element references from preparation phase +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-4-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +[ Upstream commit 628bd3e49cba1c066228e23d71a852c23e26da73 ] + +set .destroy callback releases the references to other objects in maps. +This is very late and it results in spurious EBUSY errors. Drop refcount +from the preparation phase instead, update set backend not to drop +reference counter from set .destroy path. + +Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the +reference counter because the transaction abort path releases the map +references for each element since the set is unbound. The abort path +also deals with releasing reference counter for new elements added to +unbound sets. + +Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 5 +- + net/netfilter/nf_tables_api.c | 89 ++++++++++++++++++++++++++++++++++---- + net/netfilter/nft_set_bitmap.c | 5 +- + net/netfilter/nft_set_hash.c | 23 +++++++-- + net/netfilter/nft_set_rbtree.c | 5 +- + 5 files changed, 108 insertions(+), 19 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -349,7 +349,8 @@ struct nft_set_ops { + int (*init)(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const nla[]); +- void (*destroy)(const struct nft_set *set); ++ void (*destroy)(const struct nft_ctx *ctx, ++ const struct nft_set *set); + void (*gc_init)(const struct nft_set *set); + + unsigned int elemsize; +@@ -645,6 +646,8 @@ void *nft_set_elem_init(const struct nft + u64 timeout, gfp_t gfp); + void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); ++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set, void *elem); + + /** + * struct nft_set_gc_batch_head - nf_tables set garbage collection batch +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -388,6 +388,31 @@ static int nft_trans_set_add(const struc + return 0; + } + ++static void nft_setelem_data_deactivate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem); ++ ++static int nft_mapelem_deactivate(const struct nft_ctx *ctx, ++ struct nft_set *set, ++ const struct nft_set_iter *iter, ++ struct nft_set_elem *elem) ++{ ++ nft_setelem_data_deactivate(ctx->net, set, elem); ++ ++ return 0; ++} ++ ++static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) ++{ ++ struct nft_set_iter iter = { ++ .genmask = nft_genmask_next(ctx->net), ++ .fn = nft_mapelem_deactivate, ++ }; ++ ++ set->ops->walk(ctx, set, &iter); ++ WARN_ON_ONCE(iter.err); ++} ++ + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) + { + int err; +@@ -396,6 +421,9 @@ static int nft_delset(const struct nft_c + if (err < 0) + return err; + ++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_deactivate(ctx, set); ++ + nft_deactivate_next(ctx->net, set); + nft_use_dec(&ctx->table->use); + +@@ -3741,7 +3769,7 @@ static int nf_tables_newset(struct net * + return 0; + + err4: +- ops->destroy(set); ++ ops->destroy(&ctx, set); + err3: + kfree(set->name); + err2: +@@ -3758,7 +3786,7 @@ static void nft_set_destroy(const struct + if (WARN_ON(set->use > 0)) + return; + +- set->ops->destroy(set); ++ set->ops->destroy(ctx, set); + module_put(to_set_type(set->ops)->owner); + kfree(set->name); + kvfree(set); +@@ -3883,10 +3911,39 @@ void nf_tables_unbind_set(const struct n + } + EXPORT_SYMBOL_GPL(nf_tables_unbind_set); + ++static void nft_setelem_data_activate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem); ++ ++static int nft_mapelem_activate(const struct nft_ctx *ctx, ++ struct nft_set *set, ++ const struct nft_set_iter *iter, ++ struct nft_set_elem *elem) ++{ ++ nft_setelem_data_activate(ctx->net, set, elem); ++ ++ return 0; ++} ++ ++static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) ++{ ++ struct nft_set_iter iter = { ++ .genmask = nft_genmask_next(ctx->net), ++ .fn = nft_mapelem_activate, ++ }; ++ ++ set->ops->walk(ctx, set, &iter); ++ WARN_ON_ONCE(iter.err); ++} ++ + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) + { +- if (nft_set_is_anonymous(set)) ++ if (nft_set_is_anonymous(set)) { ++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_activate(ctx, set); ++ + nft_clear(ctx->net, set); ++ } + + nft_use_inc_restore(&set->use); + } +@@ -3907,13 +3964,20 @@ void nf_tables_deactivate_set(const stru + nft_use_dec(&set->use); + break; + case NFT_TRANS_PREPARE: +- if (nft_set_is_anonymous(set)) +- nft_deactivate_next(ctx->net, set); ++ if (nft_set_is_anonymous(set)) { ++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_deactivate(ctx, set); + ++ nft_deactivate_next(ctx->net, set); ++ } + nft_use_dec(&set->use); + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: ++ if (nft_set_is_anonymous(set) && ++ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_deactivate(ctx, set); ++ + nft_use_dec(&set->use); + /* fall through */ + default: +@@ -4473,6 +4537,7 @@ void *nft_set_elem_init(const struct nft + return elem; + } + ++/* Drop references and destroy. Called from gc, dynset and abort path. */ + void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) + { +@@ -4501,11 +4566,11 @@ void nft_set_elem_destroy(const struct n + } + EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + +-/* Only called from commit path, nft_setelem_data_deactivate() already deals +- * with the refcounting from the preparation phase. ++/* Destroy element. References have been already dropped in the preparation ++ * path via nft_setelem_data_deactivate(). + */ +-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, +- const struct nft_set *set, void *elem) ++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set, void *elem) + { + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + +@@ -4513,6 +4578,7 @@ static void nf_tables_set_elem_destroy(c + nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext)); + kfree(elem); + } ++EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy); + + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr, u32 nlmsg_flags) +@@ -6940,6 +7006,8 @@ static int __nf_tables_abort(struct net + case NFT_MSG_DELSET: + nft_use_inc_restore(&trans->ctx.table->use); + nft_clear(trans->ctx.net, nft_trans_set(trans)); ++ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSETELEM: +@@ -7604,6 +7672,9 @@ static void __nft_release_table(struct n + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + nft_use_dec(&table->use); ++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ++ nft_map_deactivate(&ctx, set); ++ + nft_set_destroy(&ctx, set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { +--- a/net/netfilter/nft_set_bitmap.c ++++ b/net/netfilter/nft_set_bitmap.c +@@ -273,13 +273,14 @@ static int nft_bitmap_init(const struct + return 0; + } + +-static void nft_bitmap_destroy(const struct nft_set *set) ++static void nft_bitmap_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set) + { + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *be, *n; + + list_for_each_entry_safe(be, n, &priv->list, head) +- nft_set_elem_destroy(set, be, true); ++ nf_tables_set_elem_destroy(ctx, set, be); + } + + static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -379,19 +379,31 @@ static int nft_rhash_init(const struct n + return 0; + } + ++struct nft_rhash_ctx { ++ const struct nft_ctx ctx; ++ const struct nft_set *set; ++}; ++ + static void nft_rhash_elem_destroy(void *ptr, void *arg) + { +- nft_set_elem_destroy(arg, ptr, true); ++ struct nft_rhash_ctx *rhash_ctx = arg; ++ ++ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); + } + +-static void nft_rhash_destroy(const struct nft_set *set) ++static void nft_rhash_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set) + { + struct nft_rhash *priv = nft_set_priv(set); ++ struct nft_rhash_ctx rhash_ctx = { ++ .ctx = *ctx, ++ .set = set, ++ }; + + cancel_delayed_work_sync(&priv->gc_work); + rcu_barrier(); + rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, +- (void *)set); ++ (void *)&rhash_ctx); + } + + /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ +@@ -629,7 +641,8 @@ static int nft_hash_init(const struct nf + return 0; + } + +-static void nft_hash_destroy(const struct nft_set *set) ++static void nft_hash_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set) + { + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; +@@ -639,7 +652,7 @@ static void nft_hash_destroy(const struc + for (i = 0; i < priv->buckets; i++) { + hlist_for_each_entry_safe(he, next, &priv->table[i], node) { + hlist_del_rcu(&he->node); +- nft_set_elem_destroy(set, he, true); ++ nf_tables_set_elem_destroy(ctx, set, he); + } + } + } +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -466,7 +466,8 @@ static int nft_rbtree_init(const struct + return 0; + } + +-static void nft_rbtree_destroy(const struct nft_set *set) ++static void nft_rbtree_destroy(const struct nft_ctx *ctx, ++ const struct nft_set *set) + { + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; +@@ -477,7 +478,7 @@ static void nft_rbtree_destroy(const str + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + rbe = rb_entry(node, struct nft_rbtree_elem, node); +- nft_set_elem_destroy(set, rbe, true); ++ nf_tables_set_elem_destroy(ctx, set, rbe); + } + } + diff --git a/queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch b/queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch new file mode 100644 index 00000000000..1709ef9f073 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch @@ -0,0 +1,89 @@ +From stable+bounces-50352-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:58 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:42 +0200 +Subject: netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-14-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream. + +Netlink event path is missing a synchronization point with GC +transactions. Add GC sequence number update to netns release path and +netlink event path, any GC transaction losing race will be discarded. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -6899,6 +6899,22 @@ void nft_trans_gc_queue_sync_done(struct + } + EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done); + ++static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) ++{ ++ unsigned int gc_seq; ++ ++ /* Bump gc counter, it becomes odd, this is the busy mark. */ ++ gc_seq = READ_ONCE(nft_net->gc_seq); ++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq); ++ ++ return gc_seq; ++} ++ ++static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) ++{ ++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq); ++} ++ + static int nf_tables_commit(struct net *net, struct sk_buff *skb) + { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); +@@ -6953,9 +6969,7 @@ static int nf_tables_commit(struct net * + while (++nft_net->base_seq == 0) + ; + +- /* Bump gc counter, it becomes odd, this is the busy mark. */ +- gc_seq = READ_ONCE(nft_net->gc_seq); +- WRITE_ONCE(nft_net->gc_seq, ++gc_seq); ++ gc_seq = nft_gc_seq_begin(nft_net); + + /* step 3. Start new generation, rules_gen_X now in use. */ + net->nft.gencursor = nft_gencursor_next(net); +@@ -7083,7 +7097,7 @@ static int nf_tables_commit(struct net * + nf_tables_commit_release(net); + nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); + +- WRITE_ONCE(nft_net->gc_seq, ++gc_seq); ++ nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); + + return 0; +@@ -7898,11 +7912,19 @@ static int __net_init nf_tables_init_net + static void __net_exit nf_tables_exit_net(struct net *net) + { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); ++ unsigned int gc_seq; + + mutex_lock(&nft_net->commit_mutex); ++ ++ gc_seq = nft_gc_seq_begin(nft_net); ++ + if (!list_empty(&nft_net->commit_list)) + __nf_tables_abort(net); ++ + __nft_release_tables(net); ++ ++ nft_gc_seq_end(nft_net, gc_seq); ++ + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + } diff --git a/queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch b/queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch new file mode 100644 index 00000000000..cdd59ec4d03 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch @@ -0,0 +1,83 @@ +From stable+bounces-50360-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:20 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:49 +0200 +Subject: netfilter: nf_tables: fix memleak when more than 255 elements expired +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-21-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream. + +When more than 255 elements expired we're supposed to switch to a new gc +container structure. + +This never happens: u8 type will wrap before reaching the boundary +and nft_trans_gc_space() always returns true. + +This means we recycle the initial gc container structure and +lose track of the elements that came before. + +While at it, don't deref 'gc' after we've passed it to call_rcu. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Reported-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 2 +- + net/netfilter/nf_tables_api.c | 10 ++++++++-- + 2 files changed, 9 insertions(+), 3 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1389,7 +1389,7 @@ struct nft_trans_gc { + struct net *net; + struct nft_set *set; + u32 seq; +- u8 count; ++ u16 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; + }; +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -6857,12 +6857,15 @@ static int nft_trans_gc_space(struct nft + struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) + { ++ struct nft_set *set; ++ + if (nft_trans_gc_space(gc)) + return gc; + ++ set = gc->set; + nft_trans_gc_queue_work(gc); + +- return nft_trans_gc_alloc(gc->set, gc_seq, gfp); ++ return nft_trans_gc_alloc(set, gc_seq, gfp); + } + EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); + +@@ -6879,15 +6882,18 @@ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_asy + + struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) + { ++ struct nft_set *set; ++ + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + ++ set = gc->set; + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + +- return nft_trans_gc_alloc(gc->set, 0, gfp); ++ return nft_trans_gc_alloc(set, 0, gfp); + } + EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); + diff --git a/queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch b/queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch new file mode 100644 index 00000000000..4caf0ad8742 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch @@ -0,0 +1,194 @@ +From stable+bounces-50364-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:31 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:53 +0200 +Subject: netfilter: nf_tables: fix table flag updates +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-25-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 179d9ba5559a756f4322583388b3213fe4e391b0 upstream. + +The dormant flag need to be updated from the preparation phase, +otherwise, two consecutive requests to dorm a table in the same batch +might try to remove the same hooks twice, resulting in the following +warning: + + hook not found, pf 3 num 0 + WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 + Modules linked in: + CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Workqueue: netns cleanup_net + RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 + +This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables: +update table flags from the commit phase") to restore the previous +behaviour. + +However, there is still another problem: A batch containing a series of +dorm-wakeup-dorm table and vice-versa also trigger the warning above +since hook unregistration happens from the preparation phase, while hook +registration occurs from the commit phase. + +To fix this problem, this patch adds two internal flags to annotate the +original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and +__NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path. + +The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update +with one single transaction. + +Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com +Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 6 --- + include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 59 +++++++++++++++++++++---------- + 3 files changed, 41 insertions(+), 25 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1347,16 +1347,10 @@ struct nft_trans_chain { + + struct nft_trans_table { + bool update; +- u8 state; +- u32 flags; + }; + + #define nft_trans_table_update(trans) \ + (((struct nft_trans_table *)trans->data)->update) +-#define nft_trans_table_state(trans) \ +- (((struct nft_trans_table *)trans->data)->state) +-#define nft_trans_table_flags(trans) \ +- (((struct nft_trans_table *)trans->data)->flags) + + struct nft_trans_elem { + struct nft_set *set; +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -162,6 +162,7 @@ enum nft_hook_attributes { + enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, + }; ++#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT) + + /** + * enum nft_table_attributes - nf_tables table netlink attributes +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -676,7 +676,8 @@ static int nf_tables_fill_table_info(str + goto nla_put_failure; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || +- nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || ++ nla_put_be32(skb, NFTA_TABLE_FLAGS, ++ htonl(table->flags & NFT_TABLE_F_MASK)) || + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || + nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), + NFTA_TABLE_PAD)) +@@ -865,20 +866,22 @@ err: + + static void nf_tables_table_disable(struct net *net, struct nft_table *table) + { ++ table->flags &= ~NFT_TABLE_F_DORMANT; + nft_table_disable(net, table, 0); ++ table->flags |= NFT_TABLE_F_DORMANT; + } + +-enum { +- NFT_TABLE_STATE_UNCHANGED = 0, +- NFT_TABLE_STATE_DORMANT, +- NFT_TABLE_STATE_WAKEUP +-}; ++#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) ++#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) ++#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) ++#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ ++ __NFT_TABLE_F_WAS_AWAKEN) + + static int nf_tables_updtable(struct nft_ctx *ctx) + { + struct nft_trans *trans; + u32 flags; +- int ret = 0; ++ int ret; + + if (!ctx->nla[NFTA_TABLE_FLAGS]) + return 0; +@@ -897,21 +900,27 @@ static int nf_tables_updtable(struct nft + + if ((flags & NFT_TABLE_F_DORMANT) && + !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { +- nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; ++ ctx->table->flags |= NFT_TABLE_F_DORMANT; ++ if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) ++ ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + ctx->table->flags & NFT_TABLE_F_DORMANT) { +- ret = nf_tables_table_enable(ctx->net, ctx->table); +- if (ret >= 0) +- nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; ++ ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ++ if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { ++ ret = nf_tables_table_enable(ctx->net, ctx->table); ++ if (ret < 0) ++ goto err_register_hooks; ++ ++ ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; ++ } + } +- if (ret < 0) +- goto err; + +- nft_trans_table_flags(trans) = flags; + nft_trans_table_update(trans) = true; + nft_trans_commit_list_add_tail(ctx->net, trans); ++ + return 0; +-err: ++ ++err_register_hooks: + nft_trans_destroy(trans); + return ret; + } +@@ -7013,10 +7022,14 @@ static int nf_tables_commit(struct net * + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { +- if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) ++ if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { ++ nft_trans_destroy(trans); ++ break; ++ } ++ if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + +- trans->ctx.table->flags = nft_trans_table_flags(trans); ++ trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + } else { + nft_clear(net, trans->ctx.table); + } +@@ -7177,9 +7190,17 @@ static int __nf_tables_abort(struct net + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { +- if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) ++ if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { ++ nft_trans_destroy(trans); ++ break; ++ } ++ if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, trans->ctx.table); +- ++ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; ++ } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { ++ trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; ++ } ++ trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + nft_trans_destroy(trans); + } else { + list_del_rcu(&trans->ctx.table->list); diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch new file mode 100644 index 00000000000..a1c156bfe0a --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch @@ -0,0 +1,544 @@ +From stable+bounces-50357-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:13 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:39 +0200 +Subject: netfilter: nf_tables: GC transaction API to avoid race with control plane +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-11-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 5f68718b34a531a556f2f50300ead2862278da26 upstream. + +[ this includes + 8357bc946a2a ("netfilter: nf_tables: use correct lock to protect gc_list") ] + +The set types rhashtable and rbtree use a GC worker to reclaim memory. +>From system work queue, in periodic intervals, a scan of the table is +done. + +The major caveat here is that the nft transaction mutex is not held. +This causes a race between control plane and GC when they attempt to +delete the same element. + +We cannot grab the netlink mutex from the work queue, because the +control plane has to wait for the GC work queue in case the set is to be +removed, so we get following deadlock: + + cpu 1 cpu2 + GC work transaction comes in , lock nft mutex + `acquire nft mutex // BLOCKS + transaction asks to remove the set + set destruction calls cancel_work_sync() + +cancel_work_sync will now block forever, because it is waiting for the +mutex the caller already owns. + +This patch adds a new API that deals with garbage collection in two +steps: + +1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT + so they are not visible via lookup. Annotate current GC sequence in + the GC transaction. Enqueue GC transaction work as soon as it is + full. If ruleset is updated, then GC transaction is aborted and + retried later. + +2) GC work grabs the mutex. If GC sequence has changed then this GC + transaction lost race with control plane, abort it as it contains + stale references to objects and let GC try again later. If the + ruleset is intact, then this GC transaction deactivates and removes + the elements and it uses call_rcu() to destroy elements. + +Note that no elements are removed from GC lockless path, the _DEAD bit +is set and pointers are collected. GC catchall does not remove the +elements anymore too. There is a new set->dead flag that is set on to +abort the GC transaction to deal with set->ops->destroy() path which +removes the remaining elements in the set from commit_release, where no +mutex is held. + +To deal with GC when mutex is held, which allows safe deactivate and +removal, add sync GC API which releases the set element object via +call_rcu(). This is used by rbtree and pipapo backends which also +perform garbage collection from control plane path. + +Since element removal from sets can happen from control plane and +element garbage collection/timeout, it is necessary to keep the set +structure alive until all elements have been deactivated and destroyed. + +We cannot do a cancel_work_sync or flush_work in nft_set_destroy because +its called with the transaction mutex held, but the aforementioned async +work queue might be blocked on the very mutex that nft_set_destroy() +callchain is sitting on. + +This gives us the choice of ABBA deadlock or UaF. + +To avoid both, add set->refs refcount_t member. The GC API can then +increment the set refcount and release it once the elements have been +free'd. + +Set backends are adapted to use the GC transaction API in a follow up +patch entitled: + + ("netfilter: nf_tables: use gc transaction API in set backends") + +This is joint work with Florian Westphal. + +Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 61 ++++++++++ + net/netfilter/nf_tables_api.c | 225 ++++++++++++++++++++++++++++++++++++-- + 2 files changed, 276 insertions(+), 10 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -380,6 +380,7 @@ void nft_unregister_set(struct nft_set_t + * + * @list: table set list node + * @bindings: list of set bindings ++ * @refs: internal refcounting for async set destruction + * @table: table this set belongs to + * @net: netnamespace this set belongs to + * @name: name of the set +@@ -406,6 +407,7 @@ void nft_unregister_set(struct nft_set_t + struct nft_set { + struct list_head list; + struct list_head bindings; ++ refcount_t refs; + struct nft_table *table; + possible_net_t net; + char *name; +@@ -424,7 +426,8 @@ struct nft_set { + unsigned char *udata; + /* runtime data below here */ + const struct nft_set_ops *ops ____cacheline_aligned; +- u16 flags:14, ++ u16 flags:13, ++ dead:1, + genmask:2; + u8 klen; + u8 dlen; +@@ -1346,6 +1349,32 @@ static inline void nft_set_elem_clear_bu + clear_bit(NFT_SET_ELEM_BUSY_BIT, word); + } + ++#define NFT_SET_ELEM_DEAD_MASK (1 << 3) ++ ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++#define NFT_SET_ELEM_DEAD_BIT 3 ++#elif defined(__BIG_ENDIAN_BITFIELD) ++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) ++#else ++#error ++#endif ++ ++static inline void nft_set_elem_dead(struct nft_set_ext *ext) ++{ ++ unsigned long *word = (unsigned long *)ext; ++ ++ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); ++ set_bit(NFT_SET_ELEM_DEAD_BIT, word); ++} ++ ++static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) ++{ ++ unsigned long *word = (unsigned long *)ext; ++ ++ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); ++ return test_bit(NFT_SET_ELEM_DEAD_BIT, word); ++} ++ + /** + * struct nft_trans - nf_tables object update in transaction + * +@@ -1439,6 +1468,35 @@ struct nft_trans_flowtable { + #define nft_trans_flowtable(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flowtable) + ++#define NFT_TRANS_GC_BATCHCOUNT 256 ++ ++struct nft_trans_gc { ++ struct list_head list; ++ struct net *net; ++ struct nft_set *set; ++ u32 seq; ++ u8 count; ++ void *priv[NFT_TRANS_GC_BATCHCOUNT]; ++ struct rcu_head rcu; ++}; ++ ++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, ++ unsigned int gc_seq, gfp_t gfp); ++void nft_trans_gc_destroy(struct nft_trans_gc *trans); ++ ++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, ++ unsigned int gc_seq, gfp_t gfp); ++void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); ++ ++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); ++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); ++ ++void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); ++ ++void nft_setelem_data_deactivate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem); ++ + int __init nft_chain_filter_init(void); + void nft_chain_filter_fini(void); + +@@ -1451,6 +1509,7 @@ struct nftables_pernet { + struct mutex commit_mutex; + unsigned int base_seq; + u8 validate_state; ++ unsigned int gc_seq; + }; + + #endif /* _NET_NF_TABLES_H */ +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -29,10 +29,13 @@ + #define NFT_SET_MAX_ANONLEN 16 + + unsigned int nf_tables_net_id __read_mostly; ++EXPORT_SYMBOL_GPL(nf_tables_net_id); + + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); ++static LIST_HEAD(nf_tables_gc_list); ++static DEFINE_SPINLOCK(nf_tables_gc_list_lock); + static u64 table_handle; + + enum { +@@ -73,6 +76,9 @@ static void nft_validate_state_update(st + nft_net->validate_state = new_validate_state; + } + ++static void nft_trans_gc_work(struct work_struct *work); ++static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); ++ + static void nft_ctx_init(struct nft_ctx *ctx, + struct net *net, + const struct sk_buff *skb, +@@ -388,10 +394,6 @@ static int nft_trans_set_add(const struc + return 0; + } + +-static void nft_setelem_data_deactivate(const struct net *net, +- const struct nft_set *set, +- struct nft_set_elem *elem); +- + static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, +@@ -3739,6 +3741,7 @@ static int nf_tables_newset(struct net * + } + + INIT_LIST_HEAD(&set->bindings); ++ refcount_set(&set->refs, 1); + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; +@@ -3781,6 +3784,14 @@ err1: + return err; + } + ++static void nft_set_put(struct nft_set *set) ++{ ++ if (refcount_dec_and_test(&set->refs)) { ++ kfree(set->name); ++ kvfree(set); ++ } ++} ++ + static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) + { + if (WARN_ON(set->use > 0)) +@@ -3788,8 +3799,7 @@ static void nft_set_destroy(const struct + + set->ops->destroy(ctx, set); + module_put(to_set_type(set->ops)->owner); +- kfree(set->name); +- kvfree(set); ++ nft_set_put(set); + } + + static int nf_tables_delset(struct net *net, struct sock *nlsk, +@@ -4888,9 +4898,9 @@ static void nft_setelem_data_activate(co + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); + } + +-static void nft_setelem_data_deactivate(const struct net *net, +- const struct nft_set *set, +- struct nft_set_elem *elem) ++void nft_setelem_data_deactivate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem) + { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + +@@ -4899,6 +4909,7 @@ static void nft_setelem_data_deactivate( + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + nft_use_dec(&(*nft_set_ext_obj(ext))->use); + } ++EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate); + + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +@@ -6732,6 +6743,186 @@ static void nft_chain_del(struct nft_cha + list_del_rcu(&chain->list); + } + ++static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, ++ struct nft_trans_gc *trans) ++{ ++ void **priv = trans->priv; ++ unsigned int i; ++ ++ for (i = 0; i < trans->count; i++) { ++ struct nft_set_elem elem = { ++ .priv = priv[i], ++ }; ++ ++ nft_setelem_data_deactivate(ctx->net, trans->set, &elem); ++ trans->set->ops->remove(trans->net, trans->set, &elem); ++ } ++} ++ ++void nft_trans_gc_destroy(struct nft_trans_gc *trans) ++{ ++ nft_set_put(trans->set); ++ put_net(trans->net); ++ kfree(trans); ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_destroy); ++ ++static void nft_trans_gc_trans_free(struct rcu_head *rcu) ++{ ++ struct nft_set_elem elem = {}; ++ struct nft_trans_gc *trans; ++ struct nft_ctx ctx = {}; ++ unsigned int i; ++ ++ trans = container_of(rcu, struct nft_trans_gc, rcu); ++ ctx.net = read_pnet(&trans->set->net); ++ ++ for (i = 0; i < trans->count; i++) { ++ elem.priv = trans->priv[i]; ++ atomic_dec(&trans->set->nelems); ++ ++ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); ++ } ++ ++ nft_trans_gc_destroy(trans); ++} ++ ++static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) ++{ ++ struct nftables_pernet *nft_net; ++ struct nft_ctx ctx = {}; ++ ++ nft_net = net_generic(trans->net, nf_tables_net_id); ++ ++ mutex_lock(&nft_net->commit_mutex); ++ ++ /* Check for race with transaction, otherwise this batch refers to ++ * stale objects that might not be there anymore. Skip transaction if ++ * set has been destroyed from control plane transaction in case gc ++ * worker loses race. ++ */ ++ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { ++ mutex_unlock(&nft_net->commit_mutex); ++ return false; ++ } ++ ++ ctx.net = trans->net; ++ ctx.table = trans->set->table; ++ ++ nft_trans_gc_setelem_remove(&ctx, trans); ++ mutex_unlock(&nft_net->commit_mutex); ++ ++ return true; ++} ++ ++static void nft_trans_gc_work(struct work_struct *work) ++{ ++ struct nft_trans_gc *trans, *next; ++ LIST_HEAD(trans_gc_list); ++ ++ spin_lock(&nf_tables_gc_list_lock); ++ list_splice_init(&nf_tables_gc_list, &trans_gc_list); ++ spin_unlock(&nf_tables_gc_list_lock); ++ ++ list_for_each_entry_safe(trans, next, &trans_gc_list, list) { ++ list_del(&trans->list); ++ if (!nft_trans_gc_work_done(trans)) { ++ nft_trans_gc_destroy(trans); ++ continue; ++ } ++ call_rcu(&trans->rcu, nft_trans_gc_trans_free); ++ } ++} ++ ++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, ++ unsigned int gc_seq, gfp_t gfp) ++{ ++ struct net *net = read_pnet(&set->net); ++ struct nft_trans_gc *trans; ++ ++ trans = kzalloc(sizeof(*trans), gfp); ++ if (!trans) ++ return NULL; ++ ++ refcount_inc(&set->refs); ++ trans->set = set; ++ trans->net = get_net(net); ++ trans->seq = gc_seq; ++ ++ return trans; ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_alloc); ++ ++void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) ++{ ++ trans->priv[trans->count++] = priv; ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add); ++ ++static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) ++{ ++ spin_lock(&nf_tables_gc_list_lock); ++ list_add_tail(&trans->list, &nf_tables_gc_list); ++ spin_unlock(&nf_tables_gc_list_lock); ++ ++ schedule_work(&trans_gc_work); ++} ++ ++static int nft_trans_gc_space(struct nft_trans_gc *trans) ++{ ++ return NFT_TRANS_GC_BATCHCOUNT - trans->count; ++} ++ ++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, ++ unsigned int gc_seq, gfp_t gfp) ++{ ++ if (nft_trans_gc_space(gc)) ++ return gc; ++ ++ nft_trans_gc_queue_work(gc); ++ ++ return nft_trans_gc_alloc(gc->set, gc_seq, gfp); ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); ++ ++void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) ++{ ++ if (trans->count == 0) { ++ nft_trans_gc_destroy(trans); ++ return; ++ } ++ ++ nft_trans_gc_queue_work(trans); ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); ++ ++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) ++{ ++ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) ++ return NULL; ++ ++ if (nft_trans_gc_space(gc)) ++ return gc; ++ ++ call_rcu(&gc->rcu, nft_trans_gc_trans_free); ++ ++ return nft_trans_gc_alloc(gc->set, 0, gfp); ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); ++ ++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) ++{ ++ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); ++ ++ if (trans->count == 0) { ++ nft_trans_gc_destroy(trans); ++ return; ++ } ++ ++ call_rcu(&trans->rcu, nft_trans_gc_trans_free); ++} ++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done); ++ + static int nf_tables_commit(struct net *net, struct sk_buff *skb) + { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); +@@ -6739,6 +6930,7 @@ static int nf_tables_commit(struct net * + struct nft_trans_elem *te; + struct nft_chain *chain; + struct nft_table *table; ++ unsigned int gc_seq; + + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { +@@ -6785,6 +6977,10 @@ static int nf_tables_commit(struct net * + while (++nft_net->base_seq == 0) + ; + ++ /* Bump gc counter, it becomes odd, this is the busy mark. */ ++ gc_seq = READ_ONCE(nft_net->gc_seq); ++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq); ++ + /* step 3. Start new generation, rules_gen_X now in use. */ + net->nft.gencursor = nft_gencursor_next(net); + +@@ -6855,6 +7051,7 @@ static int nf_tables_commit(struct net * + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSET: ++ nft_trans_set(trans)->dead = 1; + list_del_rcu(&nft_trans_set(trans)->list); + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_DELSET, GFP_KERNEL); +@@ -6909,6 +7106,8 @@ static int nf_tables_commit(struct net * + + nf_tables_commit_release(net); + nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); ++ ++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + mutex_unlock(&nft_net->commit_mutex); + + return 0; +@@ -7715,6 +7914,7 @@ static int __net_init nf_tables_init_net + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; ++ nft_net->gc_seq = 0; + + return 0; + } +@@ -7731,9 +7931,15 @@ static void __net_exit nf_tables_exit_ne + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + } + ++static void nf_tables_exit_batch(struct list_head *net_exit_list) ++{ ++ flush_work(&trans_gc_work); ++} ++ + static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, + .exit = nf_tables_exit_net, ++ .exit_batch = nf_tables_exit_batch, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), + }; +@@ -7781,6 +7987,7 @@ static void __exit nf_tables_module_exit + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); + nft_chain_filter_fini(); + unregister_pernet_subsys(&nf_tables_net_ops); ++ cancel_work_sync(&trans_gc_work); + rcu_barrier(); + nf_tables_core_module_exit(); + } diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch new file mode 100644 index 00000000000..6fad4b3f06e --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch @@ -0,0 +1,39 @@ +From stable+bounces-50354-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:05 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:44 +0200 +Subject: netfilter: nf_tables: GC transaction race with abort path +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-16-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream. + +Abort path is missing a synchronization point with GC transactions. Add +GC sequence number hence any GC transaction losing race will be +discarded. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7263,7 +7263,12 @@ static int __nf_tables_abort(struct net + static int nf_tables_abort(struct net *net, struct sk_buff *skb) + { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); +- int ret = __nf_tables_abort(net); ++ unsigned int gc_seq; ++ int ret; ++ ++ gc_seq = nft_gc_seq_begin(nft_net); ++ ret = __nf_tables_abort(net); ++ nft_gc_seq_end(nft_net, gc_seq); + + mutex_unlock(&nft_net->commit_mutex); + diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch new file mode 100644 index 00000000000..4b04f749225 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch @@ -0,0 +1,40 @@ +From stable+bounces-50353-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:01 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:43 +0200 +Subject: netfilter: nf_tables: GC transaction race with netns dismantle +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-15-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 02c6c24402bf1c1e986899c14ba22a10b510916b upstream. + +Use maybe_get_net() since GC workqueue might race with netns exit path. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -6820,9 +6820,14 @@ struct nft_trans_gc *nft_trans_gc_alloc( + if (!trans) + return NULL; + ++ trans->net = maybe_get_net(net); ++ if (!trans->net) { ++ kfree(trans); ++ return NULL; ++ } ++ + refcount_inc(&set->refs); + trans->set = set; +- trans->net = get_net(net); + trans->seq = gc_seq; + + return trans; diff --git a/queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch b/queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch new file mode 100644 index 00000000000..f1e49bee79f --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch @@ -0,0 +1,49 @@ +From stable+bounces-50371-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:52 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:00 +0200 +Subject: netfilter: nf_tables: mark newset as dead on transaction abort +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-32-pablo@netfilter.org> + +From: Florian Westphal + +[ Upstream commit 08e4c8c5919fd405a4d709b4ba43d836894a26eb ] + +If a transaction is aborted, we should mark the to-be-released NEWSET dead, +just like commit path does for DEL and DESTROYSET commands. + +In both cases all remaining elements will be released via +set->ops->destroy(). + +The existing abort code does NOT post the actual release to the work queue. +Also the entire __nf_tables_abort() function is wrapped in gc_seq +begin/end pair. + +Therefore, async gc worker will never try to release the pending set +elements, as gc sequence is always stale. + +It might be possible to speed up transaction aborts via work queue too, +this would result in a race and a possible use-after-free. + +So fix this before it becomes an issue. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7254,6 +7254,7 @@ static int __nf_tables_abort(struct net + nft_trans_destroy(trans); + break; + } ++ nft_trans_set(trans)->dead = 1; + list_del_rcu(&nft_trans_set(trans)->list); + break; + case NFT_MSG_DELSET: diff --git a/queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch b/queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch new file mode 100644 index 00000000000..12ab507f789 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch @@ -0,0 +1,49 @@ +From stable+bounces-50378-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:16 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:07 +0200 +Subject: netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-39-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream. + +While the rhashtable set gc runs asynchronously, a race allows it to +collect elements from anonymous sets with timeouts while it is being +released from the commit path. + +Mingi Cho originally reported this issue in a different path in 6.1.x +with a pipapo set with low timeouts which is not possible upstream since +7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set +element timeout"). + +Fix this by setting on the dead flag for anonymous sets to skip async gc +in this case. + +According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on +transaction abort"), Florian plans to accelerate abort path by releasing +objects via workqueue, therefore, this sets on the dead flag for abort +path too. + +Cc: stable@vger.kernel.org +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Reported-by: Mingi Cho +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3943,6 +3943,7 @@ void nf_tables_unbind_set(const struct n + + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { + list_del_rcu(&set->list); ++ set->dead = 1; + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); diff --git a/queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch b/queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch new file mode 100644 index 00000000000..2257a364168 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch @@ -0,0 +1,68 @@ +From stable+bounces-50341-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:29 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:30 +0200 +Subject: netfilter: nf_tables: pass context to nft_set_destroy() +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-2-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 0c2a85edd143162b3a698f31e94bf8cdc041da87 upstream. + +The patch that adds support for stateful expressions in set definitions +require this. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3753,7 +3753,7 @@ err1: + return err; + } + +-static void nft_set_destroy(struct nft_set *set) ++static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) + { + if (WARN_ON(set->use > 0)) + return; +@@ -3926,7 +3926,7 @@ EXPORT_SYMBOL_GPL(nf_tables_deactivate_s + void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) + { + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) +- nft_set_destroy(set); ++ nft_set_destroy(ctx, set); + } + EXPORT_SYMBOL_GPL(nf_tables_destroy_set); + +@@ -6503,7 +6503,7 @@ static void nft_commit_release(struct nf + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_DELSET: +- nft_set_destroy(nft_trans_set(trans)); ++ nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + break; + case NFT_MSG_DELSETELEM: + nf_tables_set_elem_destroy(&trans->ctx, +@@ -6857,7 +6857,7 @@ static void nf_tables_abort_release(stru + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_NEWSET: +- nft_set_destroy(nft_trans_set(trans)); ++ nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), +@@ -7604,7 +7604,7 @@ static void __nft_release_table(struct n + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + nft_use_dec(&table->use); +- nft_set_destroy(set); ++ nft_set_destroy(&ctx, set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + list_del(&obj->list); diff --git a/queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch b/queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch new file mode 100644 index 00000000000..b5fb330ae9b --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch @@ -0,0 +1,63 @@ +From stable+bounces-50379-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:20 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:08 +0200 +Subject: netfilter: nf_tables: reject new basechain after table flag update +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-40-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 994209ddf4f430946f6247616b2e33d179243769 upstream. + +When dormant flag is toggled, hooks are disabled in the commit phase by +iterating over current chains in table (existing and new). + +The following configuration allows for an inconsistent state: + + add table x + add chain x y { type filter hook input priority 0; } + add table x { flags dormant; } + add chain x w { type filter hook input priority 1; } + +which triggers the following warning when trying to unregister chain w +which is already unregistered. + +[ 127.322252] WARNING: CPU: 7 PID: 1211 at net/netfilter/core.c:50 1 __nf_unregister_net_hook+0x21a/0x260 +[...] +[ 127.322519] Call Trace: +[ 127.322521] +[ 127.322524] ? __warn+0x9f/0x1a0 +[ 127.322531] ? __nf_unregister_net_hook+0x21a/0x260 +[ 127.322537] ? report_bug+0x1b1/0x1e0 +[ 127.322545] ? handle_bug+0x3c/0x70 +[ 127.322552] ? exc_invalid_op+0x17/0x40 +[ 127.322556] ? asm_exc_invalid_op+0x1a/0x20 +[ 127.322563] ? kasan_save_free_info+0x3b/0x60 +[ 127.322570] ? __nf_unregister_net_hook+0x6a/0x260 +[ 127.322577] ? __nf_unregister_net_hook+0x21a/0x260 +[ 127.322583] ? __nf_unregister_net_hook+0x6a/0x260 +[ 127.322590] ? __nf_tables_unregister_hook+0x8a/0xe0 [nf_tables] +[ 127.322655] nft_table_disable+0x75/0xf0 [nf_tables] +[ 127.322717] nf_tables_commit+0x2571/0x2620 [nf_tables] + +Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1750,6 +1750,9 @@ static int nf_tables_addchain(struct nft + struct nft_chain_hook hook; + struct nf_hook_ops *ops; + ++ if (table->flags & __NFT_TABLE_F_UPDATE) ++ return -EINVAL; ++ + err = nft_chain_parse_hook(net, nla, &hook, family, true); + if (err < 0) + return err; diff --git a/queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch b/queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch new file mode 100644 index 00000000000..3fc19b9fb7c --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch @@ -0,0 +1,182 @@ +From stable+bounces-50351-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:57 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:41 +0200 +Subject: netfilter: nf_tables: remove busy mark and gc batch API +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-13-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream. + +Ditch it, it has been replace it by the GC transaction API and it has no +clients anymore. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 97 +------------------------------------- + net/netfilter/nf_tables_api.c | 28 ---------- + 2 files changed, 5 insertions(+), 120 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -652,62 +652,6 @@ void nft_set_elem_destroy(const struct n + void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem); + +-/** +- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch +- * +- * @rcu: rcu head +- * @set: set the elements belong to +- * @cnt: count of elements +- */ +-struct nft_set_gc_batch_head { +- struct rcu_head rcu; +- const struct nft_set *set; +- unsigned int cnt; +-}; +- +-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ +- sizeof(struct nft_set_gc_batch_head)) / \ +- sizeof(void *)) +- +-/** +- * struct nft_set_gc_batch - nf_tables set garbage collection batch +- * +- * @head: GC batch head +- * @elems: garbage collection elements +- */ +-struct nft_set_gc_batch { +- struct nft_set_gc_batch_head head; +- void *elems[NFT_SET_GC_BATCH_SIZE]; +-}; +- +-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, +- gfp_t gfp); +-void nft_set_gc_batch_release(struct rcu_head *rcu); +- +-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) +-{ +- if (gcb != NULL) +- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); +-} +- +-static inline struct nft_set_gc_batch * +-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, +- gfp_t gfp) +-{ +- if (gcb != NULL) { +- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) +- return gcb; +- nft_set_gc_batch_complete(gcb); +- } +- return nft_set_gc_batch_alloc(set, gfp); +-} +- +-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, +- void *elem) +-{ +- gcb->elems[gcb->head.cnt++] = elem; +-} +- + struct nft_expr_ops; + /** + * struct nft_expr_type - nf_tables expression type +@@ -1314,47 +1258,12 @@ static inline void nft_set_elem_change_a + ext->genmask ^= nft_genmask_next(net); + } + +-/* +- * We use a free bit in the genmask field to indicate the element +- * is busy, meaning it is currently being processed either by +- * the netlink API or GC. +- * +- * Even though the genmask is only a single byte wide, this works +- * because the extension structure if fully constant once initialized, +- * so there are no non-atomic write accesses unless it is already +- * marked busy. +- */ +-#define NFT_SET_ELEM_BUSY_MASK (1 << 2) +- +-#if defined(__LITTLE_ENDIAN_BITFIELD) +-#define NFT_SET_ELEM_BUSY_BIT 2 +-#elif defined(__BIG_ENDIAN_BITFIELD) +-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +-#else +-#error +-#endif +- +-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) +-{ +- unsigned long *word = (unsigned long *)ext; +- +- BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); +- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); +-} +- +-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) +-{ +- unsigned long *word = (unsigned long *)ext; +- +- clear_bit(NFT_SET_ELEM_BUSY_BIT, word); +-} +- +-#define NFT_SET_ELEM_DEAD_MASK (1 << 3) ++#define NFT_SET_ELEM_DEAD_MASK (1 << 2) + + #if defined(__LITTLE_ENDIAN_BITFIELD) +-#define NFT_SET_ELEM_DEAD_BIT 3 ++#define NFT_SET_ELEM_DEAD_BIT 2 + #elif defined(__BIG_ENDIAN_BITFIELD) +-#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) ++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) + #else + #error + #endif +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4766,7 +4766,8 @@ static int nft_add_set_elem(struct nft_c + if (trans == NULL) + goto err4; + +- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; ++ ext->genmask = nft_genmask_cur(ctx->net); ++ + err = set->ops->insert(ctx->net, set, &elem, &ext2); + if (err) { + if (err == -EEXIST) { +@@ -5059,31 +5060,6 @@ static int nf_tables_delsetelem(struct n + return err; + } + +-void nft_set_gc_batch_release(struct rcu_head *rcu) +-{ +- struct nft_set_gc_batch *gcb; +- unsigned int i; +- +- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); +- for (i = 0; i < gcb->head.cnt; i++) +- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); +- kfree(gcb); +-} +-EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); +- +-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, +- gfp_t gfp) +-{ +- struct nft_set_gc_batch *gcb; +- +- gcb = kzalloc(sizeof(*gcb), gfp); +- if (gcb == NULL) +- return gcb; +- gcb->head.set = set; +- return gcb; +-} +-EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); +- + /* + * Stateful objects + */ diff --git a/queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch b/queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch new file mode 100644 index 00000000000..59cd79f2c91 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch @@ -0,0 +1,41 @@ +From stable+bounces-50375-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:07 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:04 +0200 +Subject: netfilter: nf_tables: set dormant flag on hook register failure +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-36-pablo@netfilter.org> + +From: Florian Westphal + +[ Upstream commit bccebf64701735533c8db37773eeacc6566cc8ec ] + +We need to set the dormant flag again if we fail to register +the hooks. + +During memory pressure hook registration can fail and we end up +with a table marked as active but no registered hooks. + +On table/base chain deletion, nf_tables will attempt to unregister +the hook again which yields a warn splat from the nftables core. + +Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com +Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -925,6 +925,7 @@ static int nf_tables_updtable(struct nft + return 0; + + err_register_hooks: ++ ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_destroy(trans); + return ret; + } diff --git a/queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch b/queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch new file mode 100644 index 00000000000..29ffa9d7ad4 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch @@ -0,0 +1,46 @@ +From stable+bounces-50372-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:55 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:01 +0200 +Subject: netfilter: nf_tables: skip dead set elements in netlink dump +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-33-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +[ Upstream commit 6b1ca88e4bb63673dc9f9c7f23c899f22c3cb17a ] + +Delete from packet path relies on the garbage collector to purge +elements with NFT_SET_ELEM_DEAD_BIT on. + +Skip these dead elements from nf_tables_dump_setelem() path, I very +rarely see tests/shell/testcases/maps/typeof_maps_add_delete reports +[DUMP FAILED] showing a mismatch in the expected output with an element +that should not be there. + +If the netlink dump happens before GC worker run, it might show dead +elements in the ruleset listing. + +nft_rhash_get() already skips dead elements in nft_rhash_cmp(), +therefore, it already does not show the element when getting a single +element via netlink control plane. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4200,7 +4200,7 @@ static int nf_tables_dump_setelem(const + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_dump_args *args; + +- if (nft_set_elem_expired(ext)) ++ if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) + return 0; + + args = container_of(iter, struct nft_set_dump_args, iter); diff --git a/queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch b/queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch new file mode 100644 index 00000000000..6bda4ac08e2 --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch @@ -0,0 +1,67 @@ +From stable+bounces-50361-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:24 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:50 +0200 +Subject: netfilter: nf_tables: unregister flowtable hooks on netns exit +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-22-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 6069da443bf65f513bb507bb21e2f87cfb1ad0b6 upstream. + +Unregister flowtable hooks before they are releases via +nf_tables_flowtable_destroy() otherwise hook core reports UAF. + +BUG: KASAN: use-after-free in nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142 +Read of size 4 at addr ffff8880736f7438 by task syz-executor579/3666 + +CPU: 0 PID: 3666 Comm: syz-executor579 Not tainted 5.16.0-rc5-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + __dump_stack lib/dump_stack.c:88 [inline] lib/dump_stack.c:106 + dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 lib/dump_stack.c:106 + print_address_description+0x65/0x380 mm/kasan/report.c:247 mm/kasan/report.c:247 + __kasan_report mm/kasan/report.c:433 [inline] + __kasan_report mm/kasan/report.c:433 [inline] mm/kasan/report.c:450 + kasan_report+0x19a/0x1f0 mm/kasan/report.c:450 mm/kasan/report.c:450 + nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142 + __nf_register_net_hook+0x27e/0x8d0 net/netfilter/core.c:429 net/netfilter/core.c:429 + nf_register_net_hook+0xaa/0x180 net/netfilter/core.c:571 net/netfilter/core.c:571 + nft_register_flowtable_net_hooks+0x3c5/0x730 net/netfilter/nf_tables_api.c:7232 net/netfilter/nf_tables_api.c:7232 + nf_tables_newflowtable+0x2022/0x2cf0 net/netfilter/nf_tables_api.c:7430 net/netfilter/nf_tables_api.c:7430 + nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] + nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] + nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] net/netfilter/nfnetlink.c:652 + nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] net/netfilter/nfnetlink.c:652 + nfnetlink_rcv+0x10e6/0x2550 net/netfilter/nfnetlink.c:652 net/netfilter/nfnetlink.c:652 + +__nft_release_hook() calls nft_unregister_flowtable_net_hooks() which +only unregisters the hooks, then after RCU grace period, it is +guaranteed that no packets add new entries to the flowtable (no flow +offload rules and flowtable hooks are reachable from packet path), so it +is safe to call nf_flow_table_free() which cleans up the remaining +entries from the flowtable (both software and hardware) and it unbinds +the flow_block. + +Fixes: ff4bf2f42a40 ("netfilter: nf_tables: add nft_unregister_flowtable_hook()") +Reported-by: syzbot+e918523f77e62790d6d9@syzkaller.appspotmail.com +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7863,6 +7863,8 @@ static void __nft_release_table(struct n + + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); ++ list_for_each_entry(flowtable, &table->flowtables, list) ++ nft_unregister_flowtable_net_hooks(net, flowtable); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { diff --git a/queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch b/queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch new file mode 100644 index 00000000000..afe00efba9e --- /dev/null +++ b/queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch @@ -0,0 +1,135 @@ +From stable+bounces-50373-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:00 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:02 +0200 +Subject: netfilter: nf_tables: validate NFPROTO_* family +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-34-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +[ Upstream commit d0009effa8862c20a13af4cb7475d9771b905693 ] + +Several expressions explicitly refer to NF_INET_* hook definitions +from expr->ops->validate, however, family is not validated. + +Bail out with EOPNOTSUPP in case they are used from unsupported +families. + +Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") +Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") +Fixes: 2fa841938c64 ("netfilter: nf_tables: introduce routing expression") +Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") +Fixes: ad49d86e07a4 ("netfilter: nf_tables: Add synproxy support") +Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") +Fixes: 6c47260250fc ("netfilter: nf_tables: add xfrm expression") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_compat.c | 12 ++++++++++++ + net/netfilter/nft_flow_offload.c | 5 +++++ + net/netfilter/nft_nat.c | 5 +++++ + net/netfilter/nft_rt.c | 5 +++++ + net/netfilter/nft_socket.c | 5 +++++ + net/netfilter/nft_tproxy.c | 5 +++++ + 6 files changed, 37 insertions(+) + +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -319,6 +319,12 @@ static int nft_target_validate(const str + unsigned int hook_mask = 0; + int ret; + ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_BRIDGE && ++ ctx->family != NFPROTO_ARP) ++ return -EOPNOTSUPP; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +@@ -560,6 +566,12 @@ static int nft_match_validate(const stru + unsigned int hook_mask = 0; + int ret; + ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_BRIDGE && ++ ctx->family != NFPROTO_ARP) ++ return -EOPNOTSUPP; ++ + if (nft_is_base_chain(ctx->chain)) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -145,6 +145,11 @@ static int nft_flow_offload_validate(con + { + unsigned int hook_mask = (1 << NF_INET_FORWARD); + ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET) ++ return -EOPNOTSUPP; ++ + return nft_chain_validate_hooks(ctx->chain, hook_mask); + } + +--- a/net/netfilter/nft_nat.c ++++ b/net/netfilter/nft_nat.c +@@ -94,6 +94,11 @@ static int nft_nat_validate(const struct + struct nft_nat *priv = nft_expr_priv(expr); + int err; + ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET) ++ return -EOPNOTSUPP; ++ + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; +--- a/net/netfilter/nft_rt.c ++++ b/net/netfilter/nft_rt.c +@@ -159,6 +159,11 @@ static int nft_rt_validate(const struct + const struct nft_rt *priv = nft_expr_priv(expr); + unsigned int hooks; + ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET) ++ return -EOPNOTSUPP; ++ + switch (priv->key) { + case NFT_RT_NEXTHOP4: + case NFT_RT_NEXTHOP6: +--- a/net/netfilter/nft_socket.c ++++ b/net/netfilter/nft_socket.c +@@ -139,6 +139,11 @@ static int nft_socket_validate(const str + const struct nft_expr *expr, + const struct nft_data **data) + { ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET) ++ return -EOPNOTSUPP; ++ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | +--- a/net/netfilter/nft_tproxy.c ++++ b/net/netfilter/nft_tproxy.c +@@ -293,6 +293,11 @@ static int nft_tproxy_validate(const str + const struct nft_expr *expr, + const struct nft_data **data) + { ++ if (ctx->family != NFPROTO_IPV4 && ++ ctx->family != NFPROTO_IPV6 && ++ ctx->family != NFPROTO_INET) ++ return -EOPNOTSUPP; ++ + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); + } + diff --git a/queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch b/queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch new file mode 100644 index 00000000000..5825b9208f9 --- /dev/null +++ b/queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch @@ -0,0 +1,80 @@ +From stable+bounces-50367-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:41 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:56 +0200 +Subject: netfilter: nft_dynset: fix timeouts later than 23 days +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-28-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 917d80d376ffbaa9725fde9e3c0282f63643f278 upstream. + +Use nf_msecs_to_jiffies64 and nf_jiffies64_to_msecs as provided by +8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23 +days"), otherwise ruleset listing breaks. + +Fixes: a8b1e36d0d1d ("netfilter: nft_dynset: fix element timeout for HZ != 1000") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 3 +++ + net/netfilter/nf_tables_api.c | 4 ++-- + net/netfilter/nft_dynset.c | 8 +++++--- + 3 files changed, 10 insertions(+), 5 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1423,4 +1423,7 @@ struct nftables_pernet { + unsigned int gc_seq; + }; + ++int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); ++__be64 nf_jiffies64_to_msecs(u64 input); ++ + #endif /* _NET_NF_TABLES_H */ +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3294,7 +3294,7 @@ cont: + return 0; + } + +-static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) ++int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) + { + u64 ms = be64_to_cpu(nla_get_be64(nla)); + u64 max = (u64)(~((u64)0)); +@@ -3308,7 +3308,7 @@ static int nf_msecs_to_jiffies64(const s + return 0; + } + +-static __be64 nf_jiffies64_to_msecs(u64 input) ++__be64 nf_jiffies64_to_msecs(u64 input) + { + u64 ms = jiffies64_to_nsecs(input); + +--- a/net/netfilter/nft_dynset.c ++++ b/net/netfilter/nft_dynset.c +@@ -169,8 +169,10 @@ static int nft_dynset_init(const struct + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; +- timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( +- tb[NFTA_DYNSET_TIMEOUT]))); ++ ++ err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); ++ if (err) ++ return err; + } + + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, +@@ -284,7 +286,7 @@ static int nft_dynset_dump(struct sk_buf + if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, +- cpu_to_be64(jiffies_to_msecs(priv->timeout)), ++ nf_jiffies64_to_msecs(priv->timeout), + NFTA_DYNSET_PAD)) + goto nla_put_failure; + if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) diff --git a/queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch b/queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch new file mode 100644 index 00000000000..c13b8401e47 --- /dev/null +++ b/queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch @@ -0,0 +1,44 @@ +From stable+bounces-50370-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:50 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:59 +0200 +Subject: netfilter: nft_dynset: relax superfluous check on set updates +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-31-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 7b1394892de8d95748d05e3ee41e85edb4abbfa1 upstream. + +Relax this condition to make add and update commands idempotent for sets +with no timeout. The eval function already checks if the set element +timeout is available and updates it if the update command is used. + +Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_dynset.c | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +--- a/net/netfilter/nft_dynset.c ++++ b/net/netfilter/nft_dynset.c +@@ -154,16 +154,8 @@ static int nft_dynset_init(const struct + return -EBUSY; + + priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); +- switch (priv->op) { +- case NFT_DYNSET_OP_ADD: +- break; +- case NFT_DYNSET_OP_UPDATE: +- if (!(set->flags & NFT_SET_TIMEOUT)) +- return -EOPNOTSUPP; +- break; +- default: ++ if (priv->op > NFT_DYNSET_OP_UPDATE) + return -EOPNOTSUPP; +- } + + timeout = 0; + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { diff --git a/queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch b/queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch new file mode 100644 index 00000000000..4466187f0db --- /dev/null +++ b/queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch @@ -0,0 +1,55 @@ +From stable+bounces-50369-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:46 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:58 +0200 +Subject: netfilter: nft_dynset: report EOPNOTSUPP on missing set feature +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-30-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 95cd4bca7b1f4a25810f3ddfc5e767fb46931789 upstream. + +If userspace requests a feature which is not available the original set +definition, then bail out with EOPNOTSUPP. If userspace sends +unsupported dynset flags (new feature not supported by this kernel), +then report EOPNOTSUPP to userspace. EINVAL should be only used to +report malformed netlink messages from userspace. + +Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_dynset.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/netfilter/nft_dynset.c ++++ b/net/netfilter/nft_dynset.c +@@ -133,7 +133,7 @@ static int nft_dynset_init(const struct + u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); + + if (flags & ~NFT_DYNSET_F_INV) +- return -EINVAL; ++ return -EOPNOTSUPP; + if (flags & NFT_DYNSET_F_INV) + priv->invert = true; + } +@@ -168,7 +168,7 @@ static int nft_dynset_init(const struct + timeout = 0; + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) +- return -EINVAL; ++ return -EOPNOTSUPP; + + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); + if (err) +@@ -182,7 +182,7 @@ static int nft_dynset_init(const struct + + if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { + if (!(set->flags & NFT_SET_MAP)) +- return -EINVAL; ++ return -EOPNOTSUPP; + if (set->dtype == NFT_DATA_VERDICT) + return -EOPNOTSUPP; + diff --git a/queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch b/queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch new file mode 100644 index 00000000000..29744a6105f --- /dev/null +++ b/queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch @@ -0,0 +1,40 @@ +From stable+bounces-50358-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:14 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:48 +0200 +Subject: netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-20-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit b079155faae94e9b3ab9337e82100a914ebb4e8d upstream. + +Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC +might collect the same element more than once. + +Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_hash.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -321,12 +321,9 @@ static void nft_rhash_gc(struct work_str + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { +- if (PTR_ERR(he) != -EAGAIN) { +- nft_trans_gc_destroy(gc); +- gc = NULL; +- goto try_later; +- } +- continue; ++ nft_trans_gc_destroy(gc); ++ gc = NULL; ++ goto try_later; + } + + /* Ruleset has been updated, try later. */ diff --git a/queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch b/queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch new file mode 100644 index 00000000000..0bf69eead87 --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch @@ -0,0 +1,73 @@ +From stable+bounces-50345-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:41 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:34 +0200 +Subject: netfilter: nft_set_rbtree: Add missing expired checks +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-6-pablo@netfilter.org> + +From: Phil Sutter + +commit 340eaff651160234bdbce07ef34b92a8e45cd540 upstream. + +Expired intervals would still match and be dumped to user space until +garbage collection wiped them out. Make sure they stop matching and +disappear (from users' perspective) as soon as they expire. + +Fixes: 8d8540c4f5e03 ("netfilter: nft_set_rbtree: add timeout support") +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -82,6 +82,10 @@ static bool __nft_rbtree_lookup(const st + parent = rcu_dereference_raw(parent->rb_left); + continue; + } ++ ++ if (nft_set_elem_expired(&rbe->ext)) ++ return false; ++ + if (nft_rbtree_interval_end(rbe)) { + if (nft_set_is_anonymous(set)) + return false; +@@ -97,6 +101,7 @@ static bool __nft_rbtree_lookup(const st + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && ++ !nft_set_elem_expired(&interval->ext) && + nft_rbtree_interval_start(interval)) { + *ext = &interval->ext; + return true; +@@ -157,6 +162,9 @@ static bool __nft_rbtree_get(const struc + continue; + } + ++ if (nft_set_elem_expired(&rbe->ext)) ++ return false; ++ + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == + (flags & NFT_SET_ELEM_INTERVAL_END)) { +@@ -173,6 +181,7 @@ static bool __nft_rbtree_get(const struc + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && ++ !nft_set_elem_expired(&interval->ext) && + ((!nft_rbtree_interval_end(interval) && + !(flags & NFT_SET_ELEM_INTERVAL_END)) || + (nft_rbtree_interval_end(interval) && +@@ -360,6 +369,8 @@ static void nft_rbtree_walk(const struct + + if (iter->count < iter->skip) + goto cont; ++ if (nft_set_elem_expired(&rbe->ext)) ++ goto cont; + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) + goto cont; + diff --git a/queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch b/queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch new file mode 100644 index 00000000000..a8f92ed35c9 --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch @@ -0,0 +1,51 @@ +From stable+bounces-50343-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:36 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:33 +0200 +Subject: netfilter: nft_set_rbtree: allow loose matching of closing element in interval +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-5-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 3b18d5eba491b2328b31efa4235724a2354af010 upstream. + +Allow to find closest matching for the right side of an interval (end +flag set on) so we allow lookups in inner ranges, eg. 10-20 in 5-25. + +Fixes: ba0e4d9917b4 ("netfilter: nf_tables: get set elements via netlink") +Reported-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -145,9 +145,12 @@ static bool __nft_rbtree_get(const struc + d = memcmp(this, key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); +- interval = rbe; ++ if (!(flags & NFT_SET_ELEM_INTERVAL_END)) ++ interval = rbe; + } else if (d > 0) { + parent = rcu_dereference_raw(parent->rb_right); ++ if (flags & NFT_SET_ELEM_INTERVAL_END) ++ interval = rbe; + } else { + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = rcu_dereference_raw(parent->rb_left); +@@ -170,7 +173,10 @@ static bool __nft_rbtree_get(const struc + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && +- !nft_rbtree_interval_end(interval)) { ++ ((!nft_rbtree_interval_end(interval) && ++ !(flags & NFT_SET_ELEM_INTERVAL_END)) || ++ (nft_rbtree_interval_end(interval) && ++ (flags & NFT_SET_ELEM_INTERVAL_END)))) { + *elem = interval; + return true; + } diff --git a/queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch b/queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch new file mode 100644 index 00000000000..47996a80417 --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch @@ -0,0 +1,87 @@ +From stable+bounces-50347-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:45 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:36 +0200 +Subject: netfilter: nft_set_rbtree: fix null deref on element insertion +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-8-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 61ae320a29b0540c16931816299eb86bf2b66c08 upstream. + +There is no guarantee that rb_prev() will not return NULL in nft_rbtree_gc_elem(): + +general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] + nft_add_set_elem+0x14b0/0x2990 + nf_tables_newsetelem+0x528/0xb30 + +Furthermore, there is a possible use-after-free while iterating, +'node' can be free'd so we need to cache the next value to use. + +Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -223,7 +223,7 @@ static int nft_rbtree_gc_elem(const stru + { + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); +- struct nft_rbtree_elem *rbe_prev; ++ struct nft_rbtree_elem *rbe_prev = NULL; + struct nft_set_gc_batch *gcb; + + gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); +@@ -231,17 +231,21 @@ static int nft_rbtree_gc_elem(const stru + return -ENOMEM; + + /* search for expired end interval coming before this element. */ +- do { ++ while (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev)) + break; + + prev = rb_prev(prev); +- } while (prev != NULL); ++ } ++ ++ if (rbe_prev) { ++ rb_erase(&rbe_prev->node, &priv->root); ++ atomic_dec(&set->nelems); ++ } + +- rb_erase(&rbe_prev->node, &priv->root); + rb_erase(&rbe->node, &priv->root); +- atomic_sub(2, &set->nelems); ++ atomic_dec(&set->nelems); + + nft_set_gc_batch_add(gcb, rbe); + nft_set_gc_batch_complete(gcb); +@@ -270,7 +274,7 @@ static int __nft_rbtree_insert(const str + struct nft_set_ext **ext) + { + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; +- struct rb_node *node, *parent, **p, *first = NULL; ++ struct rb_node *node, *next, *parent, **p, *first = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + int d, err; +@@ -309,7 +313,9 @@ static int __nft_rbtree_insert(const str + * Values stored in the tree are in reversed order, starting from + * highest to lowest value. + */ +- for (node = first; node != NULL; node = rb_next(node)) { ++ for (node = first; node != NULL; node = next) { ++ next = rb_next(node); ++ + rbe = rb_entry(node, struct nft_rbtree_elem, node); + + if (!nft_set_elem_active(&rbe->ext, genmask)) diff --git a/queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch b/queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch new file mode 100644 index 00000000000..90911a002da --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch @@ -0,0 +1,87 @@ +From stable+bounces-50348-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:49 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:37 +0200 +Subject: netfilter: nft_set_rbtree: fix overlap expiration walk +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-9-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit f718863aca469a109895cb855e6b81fff4827d71 upstream. + +The lazy gc on insert that should remove timed-out entries fails to release +the other half of the interval, if any. + +Can be reproduced with tests/shell/testcases/sets/0044interval_overlap_0 +in nftables.git and kmemleak enabled kernel. + +Second bug is the use of rbe_prev vs. prev pointer. +If rbe_prev() returns NULL after at least one iteration, rbe_prev points +to element that is not an end interval, hence it should not be removed. + +Lastly, check the genmask of the end interval if this is active in the +current generation. + +Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -219,29 +219,37 @@ static void *nft_rbtree_get(const struct + + static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, +- struct nft_rbtree_elem *rbe) ++ struct nft_rbtree_elem *rbe, ++ u8 genmask) + { + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); +- struct nft_rbtree_elem *rbe_prev = NULL; ++ struct nft_rbtree_elem *rbe_prev; + struct nft_set_gc_batch *gcb; + + gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); + if (!gcb) + return -ENOMEM; + +- /* search for expired end interval coming before this element. */ ++ /* search for end interval coming before this element. ++ * end intervals don't carry a timeout extension, they ++ * are coupled with the interval start element. ++ */ + while (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); +- if (nft_rbtree_interval_end(rbe_prev)) ++ if (nft_rbtree_interval_end(rbe_prev) && ++ nft_set_elem_active(&rbe_prev->ext, genmask)) + break; + + prev = rb_prev(prev); + } + +- if (rbe_prev) { ++ if (prev) { ++ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); ++ + rb_erase(&rbe_prev->node, &priv->root); + atomic_dec(&set->nelems); ++ nft_set_gc_batch_add(gcb, rbe_prev); + } + + rb_erase(&rbe->node, &priv->root); +@@ -323,7 +331,7 @@ static int __nft_rbtree_insert(const str + + /* perform garbage collection to avoid bogus overlap reports. */ + if (nft_set_elem_expired(&rbe->ext)) { +- err = nft_rbtree_gc_elem(set, priv, rbe); ++ err = nft_rbtree_gc_elem(set, priv, rbe, genmask); + if (err < 0) + return err; + diff --git a/queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch b/queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch new file mode 100644 index 00000000000..f0aa260a51d --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch @@ -0,0 +1,56 @@ +From stable+bounces-50374-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:02 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:02:03 +0200 +Subject: netfilter: nft_set_rbtree: skip end interval element from gc +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-35-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 upstream. + +rbtree lazy gc on insert might collect an end interval element that has +been just added in this transactions, skip end interval elements that +are not yet active. + +Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk") +Cc: stable@vger.kernel.org +Reported-by: lonial con +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -240,8 +240,7 @@ static void nft_rbtree_gc_remove(struct + + static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, +- struct nft_rbtree_elem *rbe, +- u8 genmask) ++ struct nft_rbtree_elem *rbe) + { + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); +@@ -260,7 +259,7 @@ static int nft_rbtree_gc_elem(const stru + while (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev) && +- nft_set_elem_active(&rbe_prev->ext, genmask)) ++ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) + break; + + prev = rb_prev(prev); +@@ -368,7 +367,7 @@ static int __nft_rbtree_insert(const str + */ + if (nft_set_elem_expired(&rbe->ext) && + nft_set_elem_active(&rbe->ext, cur_genmask)) { +- err = nft_rbtree_gc_elem(set, priv, rbe, genmask); ++ err = nft_rbtree_gc_elem(set, priv, rbe); + if (err < 0) + return err; + diff --git a/queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch b/queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch new file mode 100644 index 00000000000..6d8bb8502f7 --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch @@ -0,0 +1,50 @@ +From stable+bounces-50356-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:08 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:46 +0200 +Subject: netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-18-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 2ee52ae94baabf7ee09cf2a8d854b990dac5d0e4 upstream. + +New elements in this transaction might expired before such transaction +ends. Skip sync GC for such elements otherwise commit path might walk +over an already released object. Once transaction is finished, async GC +will collect such expired element. + +Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -317,6 +317,7 @@ static int __nft_rbtree_insert(const str + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; + struct rb_node *node, *next, *parent, **p, *first = NULL; + struct nft_rbtree *priv = nft_set_priv(set); ++ u8 cur_genmask = nft_genmask_cur(net); + u8 genmask = nft_genmask_next(net); + int d, err; + +@@ -362,8 +363,11 @@ static int __nft_rbtree_insert(const str + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + +- /* perform garbage collection to avoid bogus overlap reports. */ +- if (nft_set_elem_expired(&rbe->ext)) { ++ /* perform garbage collection to avoid bogus overlap reports ++ * but skip new elements in this transaction. ++ */ ++ if (nft_set_elem_expired(&rbe->ext) && ++ nft_set_elem_active(&rbe->ext, cur_genmask)) { + err = nft_rbtree_gc_elem(set, priv, rbe, genmask); + if (err < 0) + return err; diff --git a/queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch b/queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch new file mode 100644 index 00000000000..1349b00910b --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch @@ -0,0 +1,323 @@ +From stable+bounces-50346-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:42 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:35 +0200 +Subject: netfilter: nft_set_rbtree: Switch to node list walk for overlap detection +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-7-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit c9e6978e2725a7d4b6cd23b2facd3f11422c0643 upstream. + +...instead of a tree descent, which became overly complicated in an +attempt to cover cases where expired or inactive elements would affect +comparisons with the new element being inserted. + +Further, it turned out that it's probably impossible to cover all those +cases, as inactive nodes might entirely hide subtrees consisting of a +complete interval plus a node that makes the current insertion not +overlap. + +To speed up the overlap check, descent the tree to find a greater +element that is closer to the key value to insert. Then walk down the +node list for overlap detection. Starting the overlap check from +rb_first() unconditionally is slow, it takes 10 times longer due to the +full linear traversal of the list. + +Moreover, perform garbage collection of expired elements when walking +down the node list to avoid bogus overlap reports. + +For the insertion operation itself, this essentially reverts back to the +implementation before commit 7c84d41416d8 ("netfilter: nft_set_rbtree: +Detect partial overlaps on insertion"), except that cases of complete +overlap are already handled in the overlap detection phase itself, which +slightly simplifies the loop to find the insertion point. + +Based on initial patch from Stefano Brivio, including text from the +original patch description too. + +Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") +Reviewed-by: Stefano Brivio +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 223 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 198 insertions(+), 25 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -41,10 +41,12 @@ static bool nft_rbtree_interval_start(co + return !nft_rbtree_interval_end(rbe); + } + +-static bool nft_rbtree_equal(const struct nft_set *set, const void *this, +- const struct nft_rbtree_elem *interval) ++static int nft_rbtree_cmp(const struct nft_set *set, ++ const struct nft_rbtree_elem *e1, ++ const struct nft_rbtree_elem *e2) + { +- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; ++ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), ++ set->klen); + } + + static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, +@@ -55,7 +57,6 @@ static bool __nft_rbtree_lookup(const st + const struct nft_rbtree_elem *rbe, *interval = NULL; + u8 genmask = nft_genmask_cur(net); + const struct rb_node *parent; +- const void *this; + int d; + + parent = rcu_dereference_raw(priv->root.rb_node); +@@ -65,12 +66,11 @@ static bool __nft_rbtree_lookup(const st + + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + +- this = nft_set_ext_key(&rbe->ext); +- d = memcmp(this, key, set->klen); ++ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); + if (interval && +- nft_rbtree_equal(set, this, interval) && ++ !nft_rbtree_cmp(set, rbe, interval) && + nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_start(interval)) + continue; +@@ -217,43 +217,216 @@ static void *nft_rbtree_get(const struct + return rbe; + } + ++static int nft_rbtree_gc_elem(const struct nft_set *__set, ++ struct nft_rbtree *priv, ++ struct nft_rbtree_elem *rbe) ++{ ++ struct nft_set *set = (struct nft_set *)__set; ++ struct rb_node *prev = rb_prev(&rbe->node); ++ struct nft_rbtree_elem *rbe_prev; ++ struct nft_set_gc_batch *gcb; ++ ++ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); ++ if (!gcb) ++ return -ENOMEM; ++ ++ /* search for expired end interval coming before this element. */ ++ do { ++ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); ++ if (nft_rbtree_interval_end(rbe_prev)) ++ break; ++ ++ prev = rb_prev(prev); ++ } while (prev != NULL); ++ ++ rb_erase(&rbe_prev->node, &priv->root); ++ rb_erase(&rbe->node, &priv->root); ++ atomic_sub(2, &set->nelems); ++ ++ nft_set_gc_batch_add(gcb, rbe); ++ nft_set_gc_batch_complete(gcb); ++ ++ return 0; ++} ++ ++static bool nft_rbtree_update_first(const struct nft_set *set, ++ struct nft_rbtree_elem *rbe, ++ struct rb_node *first) ++{ ++ struct nft_rbtree_elem *first_elem; ++ ++ first_elem = rb_entry(first, struct nft_rbtree_elem, node); ++ /* this element is closest to where the new element is to be inserted: ++ * update the first element for the node list path. ++ */ ++ if (nft_rbtree_cmp(set, rbe, first_elem) < 0) ++ return true; ++ ++ return false; ++} ++ + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) + { ++ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; ++ struct rb_node *node, *parent, **p, *first = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); +- struct nft_rbtree_elem *rbe; +- struct rb_node *parent, **p; +- int d; ++ int d, err; + ++ /* Descend the tree to search for an existing element greater than the ++ * key value to insert that is greater than the new element. This is the ++ * first element to walk the ordered elements to find possible overlap. ++ */ + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); +- d = memcmp(nft_set_ext_key(&rbe->ext), +- nft_set_ext_key(&new->ext), +- set->klen); +- if (d < 0) ++ d = nft_rbtree_cmp(set, rbe, new); ++ ++ if (d < 0) { + p = &parent->rb_left; +- else if (d > 0) ++ } else if (d > 0) { ++ if (!first || ++ nft_rbtree_update_first(set, rbe, first)) ++ first = &rbe->node; ++ + p = &parent->rb_right; +- else { +- if (nft_rbtree_interval_end(rbe) && +- nft_rbtree_interval_start(new)) { ++ } else { ++ if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; +- } else if (nft_rbtree_interval_start(rbe) && +- nft_rbtree_interval_end(new)) { ++ else + p = &parent->rb_right; +- } else if (nft_set_elem_active(&rbe->ext, genmask)) { +- *ext = &rbe->ext; +- return -EEXIST; +- } else { +- p = &parent->rb_left; ++ } ++ } ++ ++ if (!first) ++ first = rb_first(&priv->root); ++ ++ /* Detect overlap by going through the list of valid tree nodes. ++ * Values stored in the tree are in reversed order, starting from ++ * highest to lowest value. ++ */ ++ for (node = first; node != NULL; node = rb_next(node)) { ++ rbe = rb_entry(node, struct nft_rbtree_elem, node); ++ ++ if (!nft_set_elem_active(&rbe->ext, genmask)) ++ continue; ++ ++ /* perform garbage collection to avoid bogus overlap reports. */ ++ if (nft_set_elem_expired(&rbe->ext)) { ++ err = nft_rbtree_gc_elem(set, priv, rbe); ++ if (err < 0) ++ return err; ++ ++ continue; ++ } ++ ++ d = nft_rbtree_cmp(set, rbe, new); ++ if (d == 0) { ++ /* Matching end element: no need to look for an ++ * overlapping greater or equal element. ++ */ ++ if (nft_rbtree_interval_end(rbe)) { ++ rbe_le = rbe; ++ break; ++ } ++ ++ /* first element that is greater or equal to key value. */ ++ if (!rbe_ge) { ++ rbe_ge = rbe; ++ continue; ++ } ++ ++ /* this is a closer more or equal element, update it. */ ++ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { ++ rbe_ge = rbe; ++ continue; + } ++ ++ /* element is equal to key value, make sure flags are ++ * the same, an existing more or equal start element ++ * must not be replaced by more or equal end element. ++ */ ++ if ((nft_rbtree_interval_start(new) && ++ nft_rbtree_interval_start(rbe_ge)) || ++ (nft_rbtree_interval_end(new) && ++ nft_rbtree_interval_end(rbe_ge))) { ++ rbe_ge = rbe; ++ continue; ++ } ++ } else if (d > 0) { ++ /* annotate element greater than the new element. */ ++ rbe_ge = rbe; ++ continue; ++ } else if (d < 0) { ++ /* annotate element less than the new element. */ ++ rbe_le = rbe; ++ break; + } + } ++ ++ /* - new start element matching existing start element: full overlap ++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. ++ */ ++ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && ++ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { ++ *ext = &rbe_ge->ext; ++ return -EEXIST; ++ } ++ ++ /* - new end element matching existing end element: full overlap ++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. ++ */ ++ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && ++ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { ++ *ext = &rbe_le->ext; ++ return -EEXIST; ++ } ++ ++ /* - new start element with existing closest, less or equal key value ++ * being a start element: partial overlap, reported as -ENOTEMPTY. ++ * Anonymous sets allow for two consecutive start element since they ++ * are constant, skip them to avoid bogus overlap reports. ++ */ ++ if (!nft_set_is_anonymous(set) && rbe_le && ++ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) ++ return -ENOTEMPTY; ++ ++ /* - new end element with existing closest, less or equal key value ++ * being a end element: partial overlap, reported as -ENOTEMPTY. ++ */ ++ if (rbe_le && ++ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) ++ return -ENOTEMPTY; ++ ++ /* - new end element with existing closest, greater or equal key value ++ * being an end element: partial overlap, reported as -ENOTEMPTY ++ */ ++ if (rbe_ge && ++ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) ++ return -ENOTEMPTY; ++ ++ /* Accepted element: pick insertion point depending on key value */ ++ parent = NULL; ++ p = &priv->root.rb_node; ++ while (*p != NULL) { ++ parent = *p; ++ rbe = rb_entry(parent, struct nft_rbtree_elem, node); ++ d = nft_rbtree_cmp(set, rbe, new); ++ ++ if (d < 0) ++ p = &parent->rb_left; ++ else if (d > 0) ++ p = &parent->rb_right; ++ else if (nft_rbtree_interval_end(rbe)) ++ p = &parent->rb_left; ++ else ++ p = &parent->rb_right; ++ } ++ + rb_link_node_rcu(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; diff --git a/queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch b/queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch new file mode 100644 index 00000000000..607a972a661 --- /dev/null +++ b/queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch @@ -0,0 +1,45 @@ +From stable+bounces-50359-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:15 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:47 +0200 +Subject: netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-19-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 96b33300fba880ec0eafcf3d82486f3463b4b6da upstream. + +rbtree GC does not modify the datastructure, instead it collects expired +elements and it enqueues a GC transaction. Use a read spinlock instead +to avoid data contention while GC worker is running. + +Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -629,8 +629,7 @@ static void nft_rbtree_gc(struct work_st + if (!gc) + goto done; + +- write_lock_bh(&priv->lock); +- write_seqcount_begin(&priv->count); ++ read_lock_bh(&priv->lock); + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + + /* Ruleset has been updated, try later. */ +@@ -679,8 +678,7 @@ dead_elem: + } + + try_later: +- write_seqcount_end(&priv->count); +- write_unlock_bh(&priv->lock); ++ read_unlock_bh(&priv->lock); + + if (gc) + nft_trans_gc_queue_async_done(gc); diff --git a/queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch b/queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch new file mode 100644 index 00000000000..3b95090b271 --- /dev/null +++ b/queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch @@ -0,0 +1,72 @@ +From stable+bounces-50368-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:44 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:57 +0200 +Subject: netfilter: nftables: exthdr: fix 4-byte stack OOB write +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-29-pablo@netfilter.org> + +From: Florian Westphal + +commit fd94d9dadee58e09b49075240fe83423eb1dcd36 upstream. + +If priv->len is a multiple of 4, then dst[len / 4] can write past +the destination array which leads to stack corruption. + +This construct is necessary to clean the remainder of the register +in case ->len is NOT a multiple of the register size, so make it +conditional just like nft_payload.c does. + +The bug was added in 4.1 cycle and then copied/inherited when +tcp/sctp and ip option support was added. + +Bug reported by Zero Day Initiative project (ZDI-CAN-21950, +ZDI-CAN-21951, ZDI-CAN-21961). + +Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing") +Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching") +Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks") +Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_exthdr.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/netfilter/nft_exthdr.c ++++ b/net/netfilter/nft_exthdr.c +@@ -36,6 +36,14 @@ static unsigned int optlen(const u8 *opt + return opt[offset + 1]; + } + ++static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) ++{ ++ if (len % NFT_REG32_SIZE) ++ dest[len / NFT_REG32_SIZE] = 0; ++ ++ return skb_copy_bits(skb, offset, dest, len); ++} ++ + static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +@@ -57,8 +65,7 @@ static void nft_exthdr_ipv6_eval(const s + } + offset += priv->offset; + +- dest[priv->len / NFT_REG32_SIZE] = 0; +- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) ++ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) + goto err; + return; + err: +@@ -114,7 +121,8 @@ static void nft_exthdr_tcp_eval(const st + if (priv->flags & NFT_EXTHDR_F_PRESENT) { + *dest = 1; + } else { +- dest[priv->len / NFT_REG32_SIZE] = 0; ++ if (priv->len % NFT_REG32_SIZE) ++ dest[priv->len / NFT_REG32_SIZE] = 0; + memcpy(dest, opt + offset, priv->len); + } + diff --git a/queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch b/queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch new file mode 100644 index 00000000000..d94b8741b3c --- /dev/null +++ b/queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch @@ -0,0 +1,92 @@ +From stable+bounces-50342-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:36 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:31 +0200 +Subject: netfilter: nftables: rename set element data activation/deactivation functions +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-3-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit f8bb7889af58d8e74d2d61c76b1418230f1610fa upstream. + +Rename: + +- nft_set_elem_activate() to nft_set_elem_data_activate(). +- nft_set_elem_deactivate() to nft_set_elem_data_deactivate(). + +To prepare for updates in the set element infrastructure to add support +for the special catch-all element. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4501,8 +4501,8 @@ void nft_set_elem_destroy(const struct n + } + EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + +-/* Only called from commit path, nft_set_elem_deactivate() already deals with +- * the refcounting from the preparation phase. ++/* Only called from commit path, nft_setelem_data_deactivate() already deals ++ * with the refcounting from the preparation phase. + */ + static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) +@@ -4806,9 +4806,9 @@ void nft_data_hold(const struct nft_data + } + } + +-static void nft_set_elem_activate(const struct net *net, +- const struct nft_set *set, +- struct nft_set_elem *elem) ++static void nft_setelem_data_activate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem) + { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + +@@ -4818,9 +4818,9 @@ static void nft_set_elem_activate(const + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); + } + +-static void nft_set_elem_deactivate(const struct net *net, +- const struct nft_set *set, +- struct nft_set_elem *elem) ++static void nft_setelem_data_deactivate(const struct net *net, ++ const struct nft_set *set, ++ struct nft_set_elem *elem) + { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + +@@ -4887,7 +4887,7 @@ static int nft_del_setelem(struct nft_ct + kfree(elem.priv); + elem.priv = priv; + +- nft_set_elem_deactivate(ctx->net, set, &elem); ++ nft_setelem_data_deactivate(ctx->net, set, &elem); + + nft_trans_elem(trans) = elem; + nft_trans_commit_list_add_tail(ctx->net, trans); +@@ -4921,7 +4921,7 @@ static int nft_flush_set(const struct nf + } + set->ndeact++; + +- nft_set_elem_deactivate(ctx->net, set, elem); ++ nft_setelem_data_deactivate(ctx->net, set, elem); + nft_trans_elem_set(trans) = set; + nft_trans_elem(trans) = *elem; + nft_trans_commit_list_add_tail(ctx->net, trans); +@@ -6954,7 +6954,7 @@ static int __nf_tables_abort(struct net + case NFT_MSG_DELSETELEM: + te = (struct nft_trans_elem *)trans->data; + +- nft_set_elem_activate(net, te->set, &te->elem); ++ nft_setelem_data_activate(net, te->set, &te->elem); + te->set->ops->activate(net, te->set, &te->elem); + te->set->ndeact--; + diff --git a/queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch b/queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch new file mode 100644 index 00000000000..c133ccde0ef --- /dev/null +++ b/queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch @@ -0,0 +1,114 @@ +From stable+bounces-50363-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:28 2024 +From: Pablo Neira Ayuso +Date: Thu, 13 Jun 2024 03:01:52 +0200 +Subject: netfilter: nftables: update table flags from the commit phase +To: netfilter-devel@vger.kernel.org +Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org +Message-ID: <20240613010209.104423-24-pablo@netfilter.org> + +From: Pablo Neira Ayuso + +commit 0ce7cf4127f14078ca598ba9700d813178a59409 upstream. + +Do not update table flags from the preparation phase. Store the flags +update into the transaction, then update the flags from the commit +phase. + +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_tables.h | 9 ++++++--- + net/netfilter/nf_tables_api.c | 31 ++++++++++++++++--------------- + 2 files changed, 22 insertions(+), 18 deletions(-) + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1347,13 +1347,16 @@ struct nft_trans_chain { + + struct nft_trans_table { + bool update; +- bool enable; ++ u8 state; ++ u32 flags; + }; + + #define nft_trans_table_update(trans) \ + (((struct nft_trans_table *)trans->data)->update) +-#define nft_trans_table_enable(trans) \ +- (((struct nft_trans_table *)trans->data)->enable) ++#define nft_trans_table_state(trans) \ ++ (((struct nft_trans_table *)trans->data)->state) ++#define nft_trans_table_flags(trans) \ ++ (((struct nft_trans_table *)trans->data)->flags) + + struct nft_trans_elem { + struct nft_set *set; +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -868,6 +868,12 @@ static void nf_tables_table_disable(stru + nft_table_disable(net, table, 0); + } + ++enum { ++ NFT_TABLE_STATE_UNCHANGED = 0, ++ NFT_TABLE_STATE_DORMANT, ++ NFT_TABLE_STATE_WAKEUP ++}; ++ + static int nf_tables_updtable(struct nft_ctx *ctx) + { + struct nft_trans *trans; +@@ -891,19 +897,17 @@ static int nf_tables_updtable(struct nft + + if ((flags & NFT_TABLE_F_DORMANT) && + !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { +- nft_trans_table_enable(trans) = false; ++ nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + ctx->table->flags & NFT_TABLE_F_DORMANT) { +- ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret >= 0) +- nft_trans_table_enable(trans) = true; +- else +- ctx->table->flags |= NFT_TABLE_F_DORMANT; ++ nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + } + if (ret < 0) + goto err; + ++ nft_trans_table_flags(trans) = flags; + nft_trans_table_update(trans) = true; + nft_trans_commit_list_add_tail(ctx->net, trans); + return 0; +@@ -7009,11 +7013,10 @@ static int nf_tables_commit(struct net * + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { +- if (!nft_trans_table_enable(trans)) { +- nf_tables_table_disable(net, +- trans->ctx.table); +- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; +- } ++ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) ++ nf_tables_table_disable(net, trans->ctx.table); ++ ++ trans->ctx.table->flags = nft_trans_table_flags(trans); + } else { + nft_clear(net, trans->ctx.table); + } +@@ -7174,11 +7177,9 @@ static int __nf_tables_abort(struct net + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { +- if (nft_trans_table_enable(trans)) { +- nf_tables_table_disable(net, +- trans->ctx.table); +- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; +- } ++ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) ++ nf_tables_table_disable(net, trans->ctx.table); ++ + nft_trans_destroy(trans); + } else { + list_del_rcu(&trans->ctx.table->list); diff --git a/queue-4.19/series b/queue-4.19/series index 50640dd72a7..9b0ce9246a9 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -156,3 +156,43 @@ arm64-dts-hi3798cv200-fix-the-size-of-gicr.patch media-mxl5xx-move-xpt-structures-off-stack.patch media-v4l2-core-hold-videodev_lock-until-dev-reg-finishes.patch fbdev-savage-handle-err-return-when-savagefb_check_var-failed.patch +netfilter-nf_tables-pass-context-to-nft_set_destroy.patch +netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch +netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch +netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch +netfilter-nft_set_rbtree-add-missing-expired-checks.patch +netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch +netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch +netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch +netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch +netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch +netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch +netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch +netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch +netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch +netfilter-nf_tables-gc-transaction-race-with-abort-path.patch +netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch +netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch +netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch +netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch +netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch +netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch +netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch +netfilter-nftables-update-table-flags-from-the-commit-phase.patch +netfilter-nf_tables-fix-table-flag-updates.patch +netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch +netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch +netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch +netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch +netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch +netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch +netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch +netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch +netfilter-nf_tables-validate-nfproto_-family.patch +netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch +netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch +netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch +netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch +netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch +netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch +netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch