--- /dev/null
+From 41f84d64f5c6e1c67ce8362e68beeb184d500eac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:15 +0200
+Subject: netfilter: nf_tables: adapt set backend to use GC transaction API
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream.
+
+Use the GC transaction API to replace the old and buggy gc API and the
+busy mark approach.
+
+No set elements are removed from async garbage collection anymore,
+instead the _DEAD bit is set on so the set element is not visible from
+lookup path anymore. Async GC enqueues transaction work that might be
+aborted and retried later.
+
+rbtree and pipapo set backends does not set on the _DEAD bit from the
+sync GC path since this runs in control plane path where mutex is held.
+In this case, set elements are deactivated, removed and then released
+via RCU callback, sync GC never fails.
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 7 +-
+ net/netfilter/nft_set_hash.c | 77 +++++++++++-------
+ net/netfilter/nft_set_pipapo.c | 48 ++++++++---
+ net/netfilter/nft_set_rbtree.c | 144 ++++++++++++++++++++-------------
+ 4 files changed, 173 insertions(+), 103 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index e8e18a54958f8..e179d1132f2fb 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6153,7 +6153,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
+
+ if (nft_setelem_is_catchall(set, elem)) {
+ nft_set_elem_change_active(net, set, ext);
+- nft_set_elem_clear_busy(ext);
+ } else {
+ set->ops->activate(net, set, elem);
+ }
+@@ -6168,8 +6167,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+- if (!nft_is_active(net, ext) ||
+- nft_set_elem_mark_busy(ext))
++ if (!nft_is_active(net, ext))
+ continue;
+
+ kfree(elem->priv);
+@@ -6880,8 +6878,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+- if (!nft_set_elem_active(ext, genmask) ||
+- nft_set_elem_mark_busy(ext))
++ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index 24caa31fa2310..2f067e4596b02 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
++ if (nft_set_elem_is_dead(&he->ext))
++ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
+ struct nft_rhash_elem *he = elem->priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+- nft_set_elem_clear_busy(&he->ext);
+ }
+
+ static bool nft_rhash_flush(const struct net *net,
+@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
+ {
+ struct nft_rhash_elem *he = priv;
+
+- if (!nft_set_elem_mark_busy(&he->ext) ||
+- !nft_is_active(net, &he->ext)) {
+- nft_set_elem_change_active(net, set, &he->ext);
+- return true;
+- }
+- return false;
++ nft_set_elem_change_active(net, set, &he->ext);
++
++ return true;
+ }
+
+ static void *nft_rhash_deactivate(const struct net *net,
+@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
+
+ rcu_read_lock();
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+- if (he != NULL &&
+- !nft_rhash_flush(net, set, he))
+- he = NULL;
++ if (he)
++ nft_set_elem_change_active(net, set, &he->ext);
+
+ rcu_read_unlock();
+
+@@ -312,25 +309,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
+
+ static void nft_rhash_gc(struct work_struct *work)
+ {
++ struct nftables_pernet *nft_net;
+ struct nft_set *set;
+ struct nft_rhash_elem *he;
+ struct nft_rhash *priv;
+- struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
++ struct nft_trans_gc *gc;
++ struct net *net;
++ u32 gc_seq;
+
+ priv = container_of(work, struct nft_rhash, gc_work.work);
+ set = nft_set_container_of(priv);
++ net = read_pnet(&set->net);
++ nft_net = nft_pernet(net);
++ gc_seq = READ_ONCE(nft_net->gc_seq);
++
++ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
++ if (!gc)
++ goto done;
+
+ rhashtable_walk_enter(&priv->ht, &hti);
+ rhashtable_walk_start(&hti);
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+- if (PTR_ERR(he) != -EAGAIN)
+- break;
++ if (PTR_ERR(he) != -EAGAIN) {
++ nft_trans_gc_destroy(gc);
++ gc = NULL;
++ goto try_later;
++ }
+ continue;
+ }
+
++ /* Ruleset has been updated, try later. */
++ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
++ nft_trans_gc_destroy(gc);
++ gc = NULL;
++ goto try_later;
++ }
++
++ if (nft_set_elem_is_dead(&he->ext))
++ goto dead_elem;
++
+ if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
+ nft_rhash_expr_needs_gc_run(set, &he->ext))
+ goto needs_gc_run;
+@@ -338,26 +358,26 @@ static void nft_rhash_gc(struct work_struct *work)
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ needs_gc_run:
+- if (nft_set_elem_mark_busy(&he->ext))
+- continue;
++ nft_set_elem_dead(&he->ext);
++dead_elem:
++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++ if (!gc)
++ goto try_later;
+
+- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+- if (gcb == NULL)
+- break;
+- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
+- atomic_dec(&set->nelems);
+- nft_set_gc_batch_add(gcb, he);
++ nft_trans_gc_elem_add(gc, he);
+ }
++
++ gc = nft_trans_gc_catchall(gc, gc_seq);
++
++try_later:
++ /* catchall list iteration requires rcu read side lock. */
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+- he = nft_set_catchall_gc(set);
+- if (he) {
+- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+- if (gcb)
+- nft_set_gc_batch_add(gcb, he);
+- }
+- nft_set_gc_batch_complete(gcb);
++ if (gc)
++ nft_trans_gc_queue_async_done(gc);
++
++done:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ }
+@@ -420,7 +440,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ };
+
+ cancel_delayed_work_sync(&priv->gc_work);
+- rcu_barrier();
+ rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
+ (void *)&rhash_ctx);
+ }
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index b6a994ba72f31..a307a227d28db 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -1544,16 +1544,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
+ }
+ }
+
++static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
++ struct nft_pipapo_elem *e)
++
++{
++ struct nft_set_elem elem = {
++ .priv = e,
++ };
++
++ nft_setelem_data_deactivate(net, set, &elem);
++}
++
+ /**
+ * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * @set: nftables API set representation
+ * @m: Matching data
+ */
+-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
++static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+ {
++ struct nft_set *set = (struct nft_set *) _set;
+ struct nft_pipapo *priv = nft_set_priv(set);
++ struct net *net = read_pnet(&set->net);
+ int rules_f0, first_rule = 0;
+ struct nft_pipapo_elem *e;
++ struct nft_trans_gc *gc;
++
++ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
++ if (!gc)
++ return;
+
+ while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
+ union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
+@@ -1577,13 +1595,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+ f--;
+ i--;
+ e = f->mt[rulemap[i].to].e;
+- if (nft_set_elem_expired(&e->ext) &&
+- !nft_set_elem_mark_busy(&e->ext)) {
++
++ /* synchronous gc never fails, there is no need to set on
++ * NFT_SET_ELEM_DEAD_BIT.
++ */
++ if (nft_set_elem_expired(&e->ext)) {
+ priv->dirty = true;
+- pipapo_drop(m, rulemap);
+
+- rcu_barrier();
+- nft_set_elem_destroy(set, e, true);
++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++ if (!gc)
++ break;
++
++ nft_pipapo_gc_deactivate(net, set, e);
++ pipapo_drop(m, rulemap);
++ nft_trans_gc_elem_add(gc, e);
+
+ /* And check again current first rule, which is now the
+ * first we haven't checked.
+@@ -1593,11 +1618,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+ }
+ }
+
+- e = nft_set_catchall_gc(set);
+- if (e)
+- nft_set_elem_destroy(set, e, true);
+-
+- priv->last_gc = jiffies;
++ gc = nft_trans_gc_catchall(gc, 0);
++ if (gc) {
++ nft_trans_gc_queue_sync_done(gc);
++ priv->last_gc = jiffies;
++ }
+ }
+
+ /**
+@@ -1733,7 +1758,6 @@ static void nft_pipapo_activate(const struct net *net,
+ return;
+
+ nft_set_elem_change_active(net, set, &e->ext);
+- nft_set_elem_clear_busy(&e->ext);
+ }
+
+ /**
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index 39956e5341c9e..f9d4c8fcbbf82 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
+ set->klen);
+ }
+
++static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
++{
++ return nft_set_elem_expired(&rbe->ext) ||
++ nft_set_elem_is_dead(&rbe->ext);
++}
++
+ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext,
+ unsigned int seq)
+@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
+ continue;
+ }
+
+- if (nft_set_elem_expired(&rbe->ext))
++ if (nft_rbtree_elem_expired(rbe))
+ return false;
+
+ if (nft_rbtree_interval_end(rbe)) {
+@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+- !nft_set_elem_expired(&interval->ext) &&
++ !nft_rbtree_elem_expired(interval) &&
+ nft_rbtree_interval_start(interval)) {
+ *ext = &interval->ext;
+ return true;
+@@ -215,6 +221,18 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ return rbe;
+ }
+
++static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
++ struct nft_rbtree *priv,
++ struct nft_rbtree_elem *rbe)
++{
++ struct nft_set_elem elem = {
++ .priv = rbe,
++ };
++
++ nft_setelem_data_deactivate(net, set, &elem);
++ rb_erase(&rbe->node, &priv->root);
++}
++
+ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe,
+@@ -222,11 +240,12 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ {
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
++ struct net *net = read_pnet(&set->net);
+ struct nft_rbtree_elem *rbe_prev;
+- struct nft_set_gc_batch *gcb;
++ struct nft_trans_gc *gc;
+
+- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+- if (!gcb)
++ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
++ if (!gc)
+ return -ENOMEM;
+
+ /* search for end interval coming before this element.
+@@ -244,17 +263,28 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+- rb_erase(&rbe_prev->node, &priv->root);
+- atomic_dec(&set->nelems);
+- nft_set_gc_batch_add(gcb, rbe_prev);
++ /* There is always room in this trans gc for this element,
++ * memory allocation never actually happens, hence, the warning
++ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
++ * this is synchronous gc which never fails.
++ */
++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++ if (WARN_ON_ONCE(!gc))
++ return -ENOMEM;
++
++ nft_trans_gc_elem_add(gc, rbe_prev);
+ }
+
+- rb_erase(&rbe->node, &priv->root);
+- atomic_dec(&set->nelems);
++ nft_rbtree_gc_remove(net, set, priv, rbe);
++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++ if (WARN_ON_ONCE(!gc))
++ return -ENOMEM;
++
++ nft_trans_gc_elem_add(gc, rbe);
+
+- nft_set_gc_batch_add(gcb, rbe);
+- nft_set_gc_batch_complete(gcb);
++ nft_trans_gc_queue_sync_done(gc);
+
+ return 0;
+ }
+@@ -482,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ nft_set_elem_change_active(net, set, &rbe->ext);
+- nft_set_elem_clear_busy(&rbe->ext);
+ }
+
+ static bool nft_rbtree_flush(const struct net *net,
+@@ -490,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
+ {
+ struct nft_rbtree_elem *rbe = priv;
+
+- if (!nft_set_elem_mark_busy(&rbe->ext) ||
+- !nft_is_active(net, &rbe->ext)) {
+- nft_set_elem_change_active(net, set, &rbe->ext);
+- return true;
+- }
+- return false;
++ nft_set_elem_change_active(net, set, &rbe->ext);
++
++ return true;
+ }
+
+ static void *nft_rbtree_deactivate(const struct net *net,
+@@ -570,26 +596,40 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
+
+ static void nft_rbtree_gc(struct work_struct *work)
+ {
+- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
+- struct nft_set_gc_batch *gcb = NULL;
++ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
++ struct nftables_pernet *nft_net;
+ struct nft_rbtree *priv;
++ struct nft_trans_gc *gc;
+ struct rb_node *node;
+ struct nft_set *set;
++ unsigned int gc_seq;
+ struct net *net;
+- u8 genmask;
+
+ priv = container_of(work, struct nft_rbtree, gc_work.work);
+ set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+- genmask = nft_genmask_cur(net);
++ nft_net = nft_pernet(net);
++ gc_seq = READ_ONCE(nft_net->gc_seq);
++
++ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
++ if (!gc)
++ goto done;
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
++
++ /* Ruleset has been updated, try later. */
++ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
++ nft_trans_gc_destroy(gc);
++ gc = NULL;
++ goto try_later;
++ }
++
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+- if (!nft_set_elem_active(&rbe->ext, genmask))
+- continue;
++ if (nft_set_elem_is_dead(&rbe->ext))
++ goto dead_elem;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+@@ -602,46 +642,36 @@ static void nft_rbtree_gc(struct work_struct *work)
+ if (!nft_set_elem_expired(&rbe->ext))
+ continue;
+
+- if (nft_set_elem_mark_busy(&rbe->ext)) {
+- rbe_end = NULL;
++ nft_set_elem_dead(&rbe->ext);
++
++ if (!rbe_end)
+ continue;
+- }
+
+- if (rbe_prev) {
+- rb_erase(&rbe_prev->node, &priv->root);
+- rbe_prev = NULL;
+- }
+- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+- if (!gcb)
+- break;
++ nft_set_elem_dead(&rbe_end->ext);
+
+- atomic_dec(&set->nelems);
+- nft_set_gc_batch_add(gcb, rbe);
+- rbe_prev = rbe;
++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++ if (!gc)
++ goto try_later;
+
+- if (rbe_end) {
+- atomic_dec(&set->nelems);
+- nft_set_gc_batch_add(gcb, rbe_end);
+- rb_erase(&rbe_end->node, &priv->root);
+- rbe_end = NULL;
+- }
+- node = rb_next(node);
+- if (!node)
+- break;
++ nft_trans_gc_elem_add(gc, rbe_end);
++ rbe_end = NULL;
++dead_elem:
++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++ if (!gc)
++ goto try_later;
++
++ nft_trans_gc_elem_add(gc, rbe);
+ }
+- if (rbe_prev)
+- rb_erase(&rbe_prev->node, &priv->root);
++
++ gc = nft_trans_gc_catchall(gc, gc_seq);
++
++try_later:
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+
+- rbe = nft_set_catchall_gc(set);
+- if (rbe) {
+- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+- if (gcb)
+- nft_set_gc_batch_add(gcb, rbe);
+- }
+- nft_set_gc_batch_complete(gcb);
+-
++ if (gc)
++ nft_trans_gc_queue_async_done(gc);
++done:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ }
+--
+2.40.1
+
--- /dev/null
+From 4b94d86ba6d5707a73ab3cfa13917e8652ad6865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:23 +0200
+Subject: netfilter: nf_tables: defer gc run if previous batch is still pending
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 8e51830e29e12670b4c10df070a4ea4c9593e961 upstream.
+
+Don't queue more gc work, else we may queue the same elements multiple
+times.
+
+If an element is flagged as dead, this can mean that either the previous
+gc request was invalidated/discarded by a transaction or that the previous
+request is still pending in the system work queue.
+
+The latter will happen if the gc interval is set to a very low value,
+e.g. 1ms, and system work queue is backlogged.
+
+The sets refcount is 1 if no previous gc requeusts are queued, so add
+a helper for this and skip gc run if old requests are pending.
+
+Add a helper for this and skip the gc run in this case.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 5 +++++
+ net/netfilter/nft_set_hash.c | 3 +++
+ net/netfilter/nft_set_rbtree.c | 3 +++
+ 3 files changed, 11 insertions(+)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 2d501dd901521..12777a5b60cd0 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -581,6 +581,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
+ return (void *)set->data;
+ }
+
++static inline bool nft_set_gc_is_pending(const struct nft_set *s)
++{
++ return refcount_read(&s->refs) != 1;
++}
++
+ static inline struct nft_set *nft_set_container_of(const void *priv)
+ {
+ return (void *)priv - offsetof(struct nft_set, data);
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index cef5df8460009..524763659f251 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -326,6 +326,9 @@ static void nft_rhash_gc(struct work_struct *work)
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
++ if (nft_set_gc_is_pending(set))
++ goto done;
++
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index f9d4c8fcbbf82..c6435e7092319 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -611,6 +611,9 @@ static void nft_rbtree_gc(struct work_struct *work)
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
++ if (nft_set_gc_is_pending(set))
++ goto done;
++
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
+--
+2.40.1
+
--- /dev/null
+From d638ddd47fd23cd0160729dd70351e27639282b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:18 +0200
+Subject: netfilter: nf_tables: don't fail inserts if duplicate has expired
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 7845914f45f066497ac75b30c50dbc735e84e884 upstream.
+
+nftables selftests fail:
+run-tests.sh testcases/sets/0044interval_overlap_0
+Expected: 0-2 . 0-3, got:
+W: [FAILED] ./testcases/sets/0044interval_overlap_0: got 1
+
+Insertion must ignore duplicate but expired entries.
+
+Moreover, there is a strange asymmetry in nft_pipapo_activate:
+
+It refetches the current element, whereas the other ->activate callbacks
+(bitmap, hash, rhash, rbtree) use elem->priv.
+Same for .remove: other set implementations take elem->priv,
+nft_pipapo_remove fetches elem->priv, then does a relookup,
+remove this.
+
+I suspect this was the reason for the change that prompted the
+removal of the expired check in pipapo_get() in the first place,
+but skipping exired elements there makes no sense to me, this helper
+is used for normal get requests, insertions (duplicate check)
+and deactivate callback.
+
+In first two cases expired elements must be skipped.
+
+For ->deactivate(), this gets called for DELSETELEM, so it
+seems to me that expired elements should be skipped as well, i.e.
+delete request should fail with -ENOENT error.
+
+Fixes: 24138933b97b ("netfilter: nf_tables: don't skip expired elements during walk")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_pipapo.c | 23 ++++-------------------
+ 1 file changed, 4 insertions(+), 19 deletions(-)
+
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index a307a227d28db..58bd514260b90 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -566,6 +566,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
+ goto out;
+
+ if (last) {
++ if (nft_set_elem_expired(&f->mt[b].e->ext))
++ goto next_match;
+ if ((genmask &&
+ !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
+ goto next_match;
+@@ -600,17 +602,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
+ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+ {
+- struct nft_pipapo_elem *ret;
+-
+- ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
++ return pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net));
+- if (IS_ERR(ret))
+- return ret;
+-
+- if (nft_set_elem_expired(&ret->ext))
+- return ERR_PTR(-ENOENT);
+-
+- return ret;
+ }
+
+ /**
+@@ -1751,11 +1744,7 @@ static void nft_pipapo_activate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+ {
+- struct nft_pipapo_elem *e;
+-
+- e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
+- if (IS_ERR(e))
+- return;
++ struct nft_pipapo_elem *e = elem->priv;
+
+ nft_set_elem_change_active(net, set, &e->ext);
+ }
+@@ -1969,10 +1958,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+
+ data = (const u8 *)nft_set_ext_key(&e->ext);
+
+- e = pipapo_get(net, set, data, 0);
+- if (IS_ERR(e))
+- return;
+-
+ while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
+ union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
+ const u8 *match_start, *match_end;
+--
+2.40.1
+
--- /dev/null
+From e4c9a93b8c5adfc18f7ec04739cc051358951e8d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:13 +0200
+Subject: netfilter: nf_tables: don't skip expired elements during walk
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 24138933b97b055d486e8064b4a1721702442a9b upstream.
+
+There is an asymmetry between commit/abort and preparation phase if the
+following conditions are met:
+
+1. set is a verdict map ("1.2.3.4 : jump foo")
+2. timeouts are enabled
+
+In this case, following sequence is problematic:
+
+1. element E in set S refers to chain C
+2. userspace requests removal of set S
+3. kernel does a set walk to decrement chain->use count for all elements
+ from preparation phase
+4. kernel does another set walk to remove elements from the commit phase
+ (or another walk to do a chain->use increment for all elements from
+ abort phase)
+
+If E has already expired in 1), it will be ignored during list walk, so its use count
+won't have been changed.
+
+Then, when set is culled, ->destroy callback will zap the element via
+nf_tables_set_elem_destroy(), but this function is only safe for
+elements that have been deactivated earlier from the preparation phase:
+lack of earlier deactivate removes the element but leaks the chain use
+count, which results in a WARN splat when the chain gets removed later,
+plus a leak of the nft_chain structure.
+
+Update pipapo_get() not to skip expired elements, otherwise flush
+command reports bogus ENOENT errors.
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++++
+ net/netfilter/nft_set_hash.c | 2 --
+ net/netfilter/nft_set_pipapo.c | 18 ++++++++++++------
+ net/netfilter/nft_set_rbtree.c | 2 --
+ 4 files changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 3c5cac9bd9b70..475c556f49912 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5386,8 +5386,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+ {
++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_set_dump_args *args;
+
++ if (nft_set_elem_expired(ext))
++ return 0;
++
+ args = container_of(iter, struct nft_set_dump_args, iter);
+ return nf_tables_fill_setelem(args->skb, set, elem);
+ }
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index 0b73cb0e752f7..24caa31fa2310 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -278,8 +278,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+
+ if (iter->count < iter->skip)
+ goto cont;
+- if (nft_set_elem_expired(&he->ext))
+- goto cont;
+ if (!nft_set_elem_active(&he->ext, iter->genmask))
+ goto cont;
+
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index 8c16681884b7e..b6a994ba72f31 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -566,8 +566,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
+ goto out;
+
+ if (last) {
+- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
+- (genmask &&
++ if ((genmask &&
+ !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
+ goto next_match;
+
+@@ -601,8 +600,17 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
+ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+ {
+- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
+- nft_genmask_cur(net));
++ struct nft_pipapo_elem *ret;
++
++ ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
++ nft_genmask_cur(net));
++ if (IS_ERR(ret))
++ return ret;
++
++ if (nft_set_elem_expired(&ret->ext))
++ return ERR_PTR(-ENOENT);
++
++ return ret;
+ }
+
+ /**
+@@ -2024,8 +2032,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ goto cont;
+
+ e = f->mt[r].e;
+- if (nft_set_elem_expired(&e->ext))
+- goto cont;
+
+ elem.priv = e;
+
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index 8d73fffd2d09d..39956e5341c9e 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -552,8 +552,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
+
+ if (iter->count < iter->skip)
+ goto cont;
+- if (nft_set_elem_expired(&rbe->ext))
+- goto cont;
+ if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+ goto cont;
+
+--
+2.40.1
+
--- /dev/null
+From 2105dff90f3eb057c9d54edb41842fddb9a3b520 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:19 +0200
+Subject: netfilter: nf_tables: fix GC transaction races with netns and netlink
+ event exit path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream.
+
+Netlink event path is missing a synchronization point with GC
+transactions. Add GC sequence number update to netns release path and
+netlink event path, any GC transaction losing race will be discarded.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 36 +++++++++++++++++++++++++++++++----
+ 1 file changed, 32 insertions(+), 4 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index a38d87256b8fb..693dd05fdad6e 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9413,6 +9413,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
+ }
+ }
+
++static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
++{
++ unsigned int gc_seq;
++
++ /* Bump gc counter, it becomes odd, this is the busy mark. */
++ gc_seq = READ_ONCE(nft_net->gc_seq);
++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++
++ return gc_seq;
++}
++
++static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
++{
++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++}
++
+ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -9498,9 +9514,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+
+ WRITE_ONCE(nft_net->base_seq, base_seq);
+
+- /* Bump gc counter, it becomes odd, this is the busy mark. */
+- gc_seq = READ_ONCE(nft_net->gc_seq);
+- WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++ gc_seq = nft_gc_seq_begin(nft_net);
+
+ /* step 3. Start new generation, rules_gen_X now in use. */
+ net->nft.gencursor = nft_gencursor_next(net);
+@@ -9691,7 +9705,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+ nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+- WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++ nft_gc_seq_end(nft_net, gc_seq);
+ nf_tables_commit_release(net);
+
+ return 0;
+@@ -10674,6 +10688,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+ struct net *net = n->net;
+ unsigned int deleted;
+ bool restart = false;
++ unsigned int gc_seq;
+
+ if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
+ return NOTIFY_DONE;
+@@ -10681,6 +10696,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+ nft_net = nft_pernet(net);
+ deleted = 0;
+ mutex_lock(&nft_net->commit_mutex);
++
++ gc_seq = nft_gc_seq_begin(nft_net);
++
+ if (!list_empty(&nf_tables_destroy_list))
+ nf_tables_trans_destroy_flush_work();
+ again:
+@@ -10703,6 +10721,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+ if (restart)
+ goto again;
+ }
++ nft_gc_seq_end(nft_net, gc_seq);
++
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return NOTIFY_DONE;
+@@ -10741,12 +10761,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ static void __net_exit nf_tables_exit_net(struct net *net)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
++ unsigned int gc_seq;
+
+ mutex_lock(&nft_net->commit_mutex);
++
++ gc_seq = nft_gc_seq_begin(nft_net);
++
+ if (!list_empty(&nft_net->commit_list) ||
+ !list_empty(&nft_net->module_list))
+ __nf_tables_abort(net, NFNL_ABORT_NONE);
++
+ __nft_release_tables(net);
++
++ nft_gc_seq_end(nft_net, gc_seq);
++
+ mutex_unlock(&nft_net->commit_mutex);
+ WARN_ON_ONCE(!list_empty(&nft_net->tables));
+ WARN_ON_ONCE(!list_empty(&nft_net->module_list));
+--
+2.40.1
+
--- /dev/null
+From 473b0cf242792b6c3b99987908e284293c768965 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:29 +0200
+Subject: netfilter: nf_tables: fix memleak when more than 255 elements expired
+
+From: Florian Westphal <fw@strlen.de>
+
+commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream.
+
+When more than 255 elements expired we're supposed to switch to a new gc
+container structure.
+
+This never happens: u8 type will wrap before reaching the boundary
+and nft_trans_gc_space() always returns true.
+
+This means we recycle the initial gc container structure and
+lose track of the elements that came before.
+
+While at it, don't deref 'gc' after we've passed it to call_rcu.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 2 +-
+ net/netfilter/nf_tables_api.c | 10 ++++++++--
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index eb2103a9a7dd9..05d7a60a0e1f3 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1657,7 +1657,7 @@ struct nft_trans_gc {
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+- u8 count;
++ u16 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+ };
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 6e67fb999a256..b22f2d9ee4afc 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9190,12 +9190,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans)
+ struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+ {
++ struct nft_set *set;
++
+ if (nft_trans_gc_space(gc))
+ return gc;
+
++ set = gc->set;
+ nft_trans_gc_queue_work(gc);
+
+- return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
++ return nft_trans_gc_alloc(set, gc_seq, gfp);
+ }
+
+ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+@@ -9210,15 +9213,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+
+ struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+ {
++ struct nft_set *set;
++
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
++ set = gc->set;
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+- return nft_trans_gc_alloc(gc->set, 0, gfp);
++ return nft_trans_gc_alloc(set, 0, gfp);
+ }
+
+ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+--
+2.40.1
+
--- /dev/null
+From 44dad82bcba23568352b15697ed48b2482c376dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:14 +0200
+Subject: netfilter: nf_tables: GC transaction API to avoid race with control
+ plane
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 5f68718b34a531a556f2f50300ead2862278da26 upstream.
+
+The set types rhashtable and rbtree use a GC worker to reclaim memory.
+From system work queue, in periodic intervals, a scan of the table is
+done.
+
+The major caveat here is that the nft transaction mutex is not held.
+This causes a race between control plane and GC when they attempt to
+delete the same element.
+
+We cannot grab the netlink mutex from the work queue, because the
+control plane has to wait for the GC work queue in case the set is to be
+removed, so we get following deadlock:
+
+ cpu 1 cpu2
+ GC work transaction comes in , lock nft mutex
+ `acquire nft mutex // BLOCKS
+ transaction asks to remove the set
+ set destruction calls cancel_work_sync()
+
+cancel_work_sync will now block forever, because it is waiting for the
+mutex the caller already owns.
+
+This patch adds a new API that deals with garbage collection in two
+steps:
+
+1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT
+ so they are not visible via lookup. Annotate current GC sequence in
+ the GC transaction. Enqueue GC transaction work as soon as it is
+ full. If ruleset is updated, then GC transaction is aborted and
+ retried later.
+
+2) GC work grabs the mutex. If GC sequence has changed then this GC
+ transaction lost race with control plane, abort it as it contains
+ stale references to objects and let GC try again later. If the
+ ruleset is intact, then this GC transaction deactivates and removes
+ the elements and it uses call_rcu() to destroy elements.
+
+Note that no elements are removed from GC lockless path, the _DEAD bit
+is set and pointers are collected. GC catchall does not remove the
+elements anymore too. There is a new set->dead flag that is set on to
+abort the GC transaction to deal with set->ops->destroy() path which
+removes the remaining elements in the set from commit_release, where no
+mutex is held.
+
+To deal with GC when mutex is held, which allows safe deactivate and
+removal, add sync GC API which releases the set element object via
+call_rcu(). This is used by rbtree and pipapo backends which also
+perform garbage collection from control plane path.
+
+Since element removal from sets can happen from control plane and
+element garbage collection/timeout, it is necessary to keep the set
+structure alive until all elements have been deactivated and destroyed.
+
+We cannot do a cancel_work_sync or flush_work in nft_set_destroy because
+its called with the transaction mutex held, but the aforementioned async
+work queue might be blocked on the very mutex that nft_set_destroy()
+callchain is sitting on.
+
+This gives us the choice of ABBA deadlock or UaF.
+
+To avoid both, add set->refs refcount_t member. The GC API can then
+increment the set refcount and release it once the elements have been
+free'd.
+
+Set backends are adapted to use the GC transaction API in a follow up
+patch entitled:
+
+ ("netfilter: nf_tables: use gc transaction API in set backends")
+
+This is joint work with Florian Westphal.
+
+Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 64 +++++++-
+ net/netfilter/nf_tables_api.c | 248 ++++++++++++++++++++++++++++--
+ 2 files changed, 300 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index c752b6f509791..3b76370683c82 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -507,6 +507,7 @@ struct nft_set_elem_expr {
+ *
+ * @list: table set list node
+ * @bindings: list of set bindings
++ * @refs: internal refcounting for async set destruction
+ * @table: table this set belongs to
+ * @net: netnamespace this set belongs to
+ * @name: name of the set
+@@ -536,6 +537,7 @@ struct nft_set_elem_expr {
+ struct nft_set {
+ struct list_head list;
+ struct list_head bindings;
++ refcount_t refs;
+ struct nft_table *table;
+ possible_net_t net;
+ char *name;
+@@ -557,7 +559,8 @@ struct nft_set {
+ struct list_head pending_update;
+ /* runtime data below here */
+ const struct nft_set_ops *ops ____cacheline_aligned;
+- u16 flags:14,
++ u16 flags:13,
++ dead:1,
+ genmask:2;
+ u8 klen;
+ u8 dlen;
+@@ -1577,6 +1580,32 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+ clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ }
+
++#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
++
++#if defined(__LITTLE_ENDIAN_BITFIELD)
++#define NFT_SET_ELEM_DEAD_BIT 3
++#elif defined(__BIG_ENDIAN_BITFIELD)
++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
++#else
++#error
++#endif
++
++static inline void nft_set_elem_dead(struct nft_set_ext *ext)
++{
++ unsigned long *word = (unsigned long *)ext;
++
++ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
++ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
++}
++
++static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
++{
++ unsigned long *word = (unsigned long *)ext;
++
++ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
++ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
++}
++
+ /**
+ * struct nft_trans - nf_tables object update in transaction
+ *
+@@ -1708,6 +1737,38 @@ struct nft_trans_flowtable {
+ #define nft_trans_flowtable_flags(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->flags)
+
++#define NFT_TRANS_GC_BATCHCOUNT 256
++
++struct nft_trans_gc {
++ struct list_head list;
++ struct net *net;
++ struct nft_set *set;
++ u32 seq;
++ u8 count;
++ void *priv[NFT_TRANS_GC_BATCHCOUNT];
++ struct rcu_head rcu;
++};
++
++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
++ unsigned int gc_seq, gfp_t gfp);
++void nft_trans_gc_destroy(struct nft_trans_gc *trans);
++
++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
++ unsigned int gc_seq, gfp_t gfp);
++void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
++
++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
++
++void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
++
++struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
++ unsigned int gc_seq);
++
++void nft_setelem_data_deactivate(const struct net *net,
++ const struct nft_set *set,
++ struct nft_set_elem *elem);
++
+ int __init nft_chain_filter_init(void);
+ void nft_chain_filter_fini(void);
+
+@@ -1735,6 +1796,7 @@ struct nftables_pernet {
+ u64 table_handle;
+ unsigned int base_seq;
+ u8 validate_state;
++ unsigned int gc_seq;
+ };
+
+ extern unsigned int nf_tables_net_id;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 475c556f49912..e8e18a54958f8 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
+ static LIST_HEAD(nf_tables_destroy_list);
++static LIST_HEAD(nf_tables_gc_list);
+ static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
++static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
+
+ enum {
+ NFT_VALIDATE_SKIP = 0,
+@@ -122,6 +124,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
+ static void nf_tables_trans_destroy_work(struct work_struct *w);
+ static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+
++static void nft_trans_gc_work(struct work_struct *work);
++static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
++
+ static void nft_ctx_init(struct nft_ctx *ctx,
+ struct net *net,
+ const struct sk_buff *skb,
+@@ -583,10 +588,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ return __nft_trans_set_add(ctx, msg_type, set, NULL);
+ }
+
+-static void nft_setelem_data_deactivate(const struct net *net,
+- const struct nft_set *set,
+- struct nft_set_elem *elem);
+-
+ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+@@ -4854,6 +4855,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+
+ INIT_LIST_HEAD(&set->bindings);
+ INIT_LIST_HEAD(&set->catchall_list);
++ refcount_set(&set->refs, 1);
+ set->table = table;
+ write_pnet(&set->net, net);
+ set->ops = ops;
+@@ -4921,6 +4923,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
+ }
+ }
+
++static void nft_set_put(struct nft_set *set)
++{
++ if (refcount_dec_and_test(&set->refs)) {
++ kfree(set->name);
++ kvfree(set);
++ }
++}
++
+ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+ int i;
+@@ -4933,8 +4943,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+
+ set->ops->destroy(ctx, set);
+ nft_set_catchall_destroy(ctx, set);
+- kfree(set->name);
+- kvfree(set);
++ nft_set_put(set);
+ }
+
+ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
+@@ -6051,7 +6060,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (nft_set_elem_active(ext, genmask) &&
+- !nft_set_elem_expired(ext))
++ !nft_set_elem_expired(ext) &&
++ !nft_set_elem_is_dead(ext))
+ return ext;
+ }
+
+@@ -6704,9 +6714,9 @@ static void nft_setelem_data_activate(const struct net *net,
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+ }
+
+-static void nft_setelem_data_deactivate(const struct net *net,
+- const struct nft_set *set,
+- struct nft_set_elem *elem)
++void nft_setelem_data_deactivate(const struct net *net,
++ const struct nft_set *set,
++ struct nft_set_elem *elem)
+ {
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+@@ -9093,6 +9103,207 @@ void nft_chain_del(struct nft_chain *chain)
+ list_del_rcu(&chain->list);
+ }
+
++static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
++ struct nft_trans_gc *trans)
++{
++ void **priv = trans->priv;
++ unsigned int i;
++
++ for (i = 0; i < trans->count; i++) {
++ struct nft_set_elem elem = {
++ .priv = priv[i],
++ };
++
++ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
++ nft_setelem_remove(ctx->net, trans->set, &elem);
++ }
++}
++
++void nft_trans_gc_destroy(struct nft_trans_gc *trans)
++{
++ nft_set_put(trans->set);
++ put_net(trans->net);
++ kfree(trans);
++}
++
++static void nft_trans_gc_trans_free(struct rcu_head *rcu)
++{
++ struct nft_set_elem elem = {};
++ struct nft_trans_gc *trans;
++ struct nft_ctx ctx = {};
++ unsigned int i;
++
++ trans = container_of(rcu, struct nft_trans_gc, rcu);
++ ctx.net = read_pnet(&trans->set->net);
++
++ for (i = 0; i < trans->count; i++) {
++ elem.priv = trans->priv[i];
++ if (!nft_setelem_is_catchall(trans->set, &elem))
++ atomic_dec(&trans->set->nelems);
++
++ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
++ }
++
++ nft_trans_gc_destroy(trans);
++}
++
++static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
++{
++ struct nftables_pernet *nft_net;
++ struct nft_ctx ctx = {};
++
++ nft_net = nft_pernet(trans->net);
++
++ mutex_lock(&nft_net->commit_mutex);
++
++ /* Check for race with transaction, otherwise this batch refers to
++ * stale objects that might not be there anymore. Skip transaction if
++ * set has been destroyed from control plane transaction in case gc
++ * worker loses race.
++ */
++ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
++ mutex_unlock(&nft_net->commit_mutex);
++ return false;
++ }
++
++ ctx.net = trans->net;
++ ctx.table = trans->set->table;
++
++ nft_trans_gc_setelem_remove(&ctx, trans);
++ mutex_unlock(&nft_net->commit_mutex);
++
++ return true;
++}
++
++static void nft_trans_gc_work(struct work_struct *work)
++{
++ struct nft_trans_gc *trans, *next;
++ LIST_HEAD(trans_gc_list);
++
++ spin_lock(&nf_tables_destroy_list_lock);
++ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
++ spin_unlock(&nf_tables_destroy_list_lock);
++
++ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
++ list_del(&trans->list);
++ if (!nft_trans_gc_work_done(trans)) {
++ nft_trans_gc_destroy(trans);
++ continue;
++ }
++ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
++ }
++}
++
++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
++ unsigned int gc_seq, gfp_t gfp)
++{
++ struct net *net = read_pnet(&set->net);
++ struct nft_trans_gc *trans;
++
++ trans = kzalloc(sizeof(*trans), gfp);
++ if (!trans)
++ return NULL;
++
++ refcount_inc(&set->refs);
++ trans->set = set;
++ trans->net = get_net(net);
++ trans->seq = gc_seq;
++
++ return trans;
++}
++
++void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
++{
++ trans->priv[trans->count++] = priv;
++}
++
++static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
++{
++ spin_lock(&nf_tables_gc_list_lock);
++ list_add_tail(&trans->list, &nf_tables_gc_list);
++ spin_unlock(&nf_tables_gc_list_lock);
++
++ schedule_work(&trans_gc_work);
++}
++
++static int nft_trans_gc_space(struct nft_trans_gc *trans)
++{
++ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
++}
++
++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
++ unsigned int gc_seq, gfp_t gfp)
++{
++ if (nft_trans_gc_space(gc))
++ return gc;
++
++ nft_trans_gc_queue_work(gc);
++
++ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
++}
++
++void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
++{
++ if (trans->count == 0) {
++ nft_trans_gc_destroy(trans);
++ return;
++ }
++
++ nft_trans_gc_queue_work(trans);
++}
++
++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
++{
++ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
++ return NULL;
++
++ if (nft_trans_gc_space(gc))
++ return gc;
++
++ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
++
++ return nft_trans_gc_alloc(gc->set, 0, gfp);
++}
++
++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
++{
++ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
++
++ if (trans->count == 0) {
++ nft_trans_gc_destroy(trans);
++ return;
++ }
++
++ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
++}
++
++struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
++ unsigned int gc_seq)
++{
++ struct nft_set_elem_catchall *catchall;
++ const struct nft_set *set = gc->set;
++ struct nft_set_ext *ext;
++
++ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
++ ext = nft_set_elem_ext(set, catchall->elem);
++
++ if (!nft_set_elem_expired(ext))
++ continue;
++ if (nft_set_elem_is_dead(ext))
++ goto dead_elem;
++
++ nft_set_elem_dead(ext);
++dead_elem:
++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++ if (!gc)
++ return NULL;
++
++ nft_trans_gc_elem_add(gc, catchall->elem);
++ }
++
++ return gc;
++}
++
+ static void nf_tables_module_autoload_cleanup(struct net *net)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -9255,11 +9466,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nft_trans *trans, *next;
++ unsigned int base_seq, gc_seq;
+ LIST_HEAD(set_update_list);
+ struct nft_trans_elem *te;
+ struct nft_chain *chain;
+ struct nft_table *table;
+- unsigned int base_seq;
+ LIST_HEAD(adl);
+ int err;
+
+@@ -9336,6 +9547,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+
+ WRITE_ONCE(nft_net->base_seq, base_seq);
+
++ /* Bump gc counter, it becomes odd, this is the busy mark. */
++ gc_seq = READ_ONCE(nft_net->gc_seq);
++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++
+ /* step 3. Start new generation, rules_gen_X now in use. */
+ net->nft.gencursor = nft_gencursor_next(net);
+
+@@ -9424,6 +9639,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSET:
++ nft_trans_set(trans)->dead = 1;
+ list_del_rcu(&nft_trans_set(trans)->list);
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_DELSET, GFP_KERNEL);
+@@ -9523,6 +9739,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nft_commit_notify(net, NETLINK_CB(skb).portid);
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+ nf_tables_commit_audit_log(&adl, nft_net->base_seq);
++
++ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+ nf_tables_commit_release(net);
+
+ return 0;
+@@ -10555,6 +10773,7 @@ static int __net_init nf_tables_init_net(struct net *net)
+ mutex_init(&nft_net->commit_mutex);
+ nft_net->base_seq = 1;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
++ nft_net->gc_seq = 0;
+
+ return 0;
+ }
+@@ -10583,10 +10802,16 @@ static void __net_exit nf_tables_exit_net(struct net *net)
+ WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
+ }
+
++static void nf_tables_exit_batch(struct list_head *net_exit_list)
++{
++ flush_work(&trans_gc_work);
++}
++
+ static struct pernet_operations nf_tables_net_ops = {
+ .init = nf_tables_init_net,
+ .pre_exit = nf_tables_pre_exit_net,
+ .exit = nf_tables_exit_net,
++ .exit_batch = nf_tables_exit_batch,
+ .id = &nf_tables_net_id,
+ .size = sizeof(struct nftables_pernet),
+ };
+@@ -10658,6 +10883,7 @@ static void __exit nf_tables_module_exit(void)
+ nft_chain_filter_fini();
+ nft_chain_route_fini();
+ unregister_pernet_subsys(&nf_tables_net_ops);
++ cancel_work_sync(&trans_gc_work);
+ cancel_work_sync(&trans_destroy_work);
+ rcu_barrier();
+ rhltable_destroy(&nft_objname_ht);
+--
+2.40.1
+
--- /dev/null
+From 01e67a09fbbedc68b18fe3ae14d431cff2a675cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:21 +0200
+Subject: netfilter: nf_tables: GC transaction race with abort path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream.
+
+Abort path is missing a synchronization point with GC transactions. Add
+GC sequence number hence any GC transaction losing race will be
+discarded.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 53ee6ac16f9e9..0455af9a66af1 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9969,7 +9969,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+ enum nfnl_abort_action action)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+- int ret = __nf_tables_abort(net, action);
++ unsigned int gc_seq;
++ int ret;
++
++ gc_seq = nft_gc_seq_begin(nft_net);
++ ret = __nf_tables_abort(net, action);
++ nft_gc_seq_end(nft_net, gc_seq);
+
+ mutex_unlock(&nft_net->commit_mutex);
+
+--
+2.40.1
+
--- /dev/null
+From 185f3c77ccd11bda2120eb792910aee3188286d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:20 +0200
+Subject: netfilter: nf_tables: GC transaction race with netns dismantle
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 02c6c24402bf1c1e986899c14ba22a10b510916b upstream.
+
+Use maybe_get_net() since GC workqueue might race with netns exit path.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 693dd05fdad6e..53ee6ac16f9e9 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9155,9 +9155,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ if (!trans)
+ return NULL;
+
++ trans->net = maybe_get_net(net);
++ if (!trans->net) {
++ kfree(trans);
++ return NULL;
++ }
++
+ refcount_inc(&set->refs);
+ trans->set = set;
+- trans->net = get_net(net);
+ trans->seq = gc_seq;
+
+ return trans;
+--
+2.40.1
+
--- /dev/null
+From bacc76761a2e2c9042941721a1edf68131ce8beb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:17 +0200
+Subject: netfilter: nf_tables: remove busy mark and gc batch API
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream.
+
+Ditch it, it has been replace it by the GC transaction API and it has no
+clients anymore.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 98 +------------------------------
+ net/netfilter/nf_tables_api.c | 48 +--------------
+ 2 files changed, 4 insertions(+), 142 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 3b76370683c82..2d501dd901521 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -594,7 +594,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
+
+ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+ const struct nft_set *set);
+-void *nft_set_catchall_gc(const struct nft_set *set);
+
+ static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+ {
+@@ -811,62 +810,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem);
+
+-/**
+- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+- *
+- * @rcu: rcu head
+- * @set: set the elements belong to
+- * @cnt: count of elements
+- */
+-struct nft_set_gc_batch_head {
+- struct rcu_head rcu;
+- const struct nft_set *set;
+- unsigned int cnt;
+-};
+-
+-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
+- sizeof(struct nft_set_gc_batch_head)) / \
+- sizeof(void *))
+-
+-/**
+- * struct nft_set_gc_batch - nf_tables set garbage collection batch
+- *
+- * @head: GC batch head
+- * @elems: garbage collection elements
+- */
+-struct nft_set_gc_batch {
+- struct nft_set_gc_batch_head head;
+- void *elems[NFT_SET_GC_BATCH_SIZE];
+-};
+-
+-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+- gfp_t gfp);
+-void nft_set_gc_batch_release(struct rcu_head *rcu);
+-
+-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
+-{
+- if (gcb != NULL)
+- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
+-}
+-
+-static inline struct nft_set_gc_batch *
+-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
+- gfp_t gfp)
+-{
+- if (gcb != NULL) {
+- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
+- return gcb;
+- nft_set_gc_batch_complete(gcb);
+- }
+- return nft_set_gc_batch_alloc(set, gfp);
+-}
+-
+-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
+- void *elem)
+-{
+- gcb->elems[gcb->head.cnt++] = elem;
+-}
+-
+ struct nft_expr_ops;
+ /**
+ * struct nft_expr_type - nf_tables expression type
+@@ -1545,47 +1488,12 @@ static inline void nft_set_elem_change_active(const struct net *net,
+
+ #endif /* IS_ENABLED(CONFIG_NF_TABLES) */
+
+-/*
+- * We use a free bit in the genmask field to indicate the element
+- * is busy, meaning it is currently being processed either by
+- * the netlink API or GC.
+- *
+- * Even though the genmask is only a single byte wide, this works
+- * because the extension structure if fully constant once initialized,
+- * so there are no non-atomic write accesses unless it is already
+- * marked busy.
+- */
+-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+-
+-#if defined(__LITTLE_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_BUSY_BIT 2
+-#elif defined(__BIG_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+-#else
+-#error
+-#endif
+-
+-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+-{
+- unsigned long *word = (unsigned long *)ext;
+-
+- BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+-}
+-
+-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+-{
+- unsigned long *word = (unsigned long *)ext;
+-
+- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+-}
+-
+-#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
++#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
+
+ #if defined(__LITTLE_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_DEAD_BIT 3
++#define NFT_SET_ELEM_DEAD_BIT 2
+ #elif defined(__BIG_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+ #else
+ #error
+ #endif
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index e179d1132f2fb..a38d87256b8fb 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6069,29 +6069,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+ }
+ EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
+
+-void *nft_set_catchall_gc(const struct nft_set *set)
+-{
+- struct nft_set_elem_catchall *catchall, *next;
+- struct nft_set_ext *ext;
+- void *elem = NULL;
+-
+- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+- ext = nft_set_elem_ext(set, catchall->elem);
+-
+- if (!nft_set_elem_expired(ext) ||
+- nft_set_elem_mark_busy(ext))
+- continue;
+-
+- elem = catchall->elem;
+- list_del_rcu(&catchall->list);
+- kfree_rcu(catchall, rcu);
+- break;
+- }
+-
+- return elem;
+-}
+-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
+-
+ static int nft_setelem_catchall_insert(const struct net *net,
+ struct nft_set *set,
+ const struct nft_set_elem *elem,
+@@ -6562,7 +6539,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ goto err_elem_free;
+ }
+
+- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
++ ext->genmask = nft_genmask_cur(ctx->net);
+
+ err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
+ if (err) {
+@@ -6949,29 +6926,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
+ return err;
+ }
+
+-void nft_set_gc_batch_release(struct rcu_head *rcu)
+-{
+- struct nft_set_gc_batch *gcb;
+- unsigned int i;
+-
+- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+- for (i = 0; i < gcb->head.cnt; i++)
+- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
+- kfree(gcb);
+-}
+-
+-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+- gfp_t gfp)
+-{
+- struct nft_set_gc_batch *gcb;
+-
+- gcb = kzalloc(sizeof(*gcb), gfp);
+- if (gcb == NULL)
+- return gcb;
+- gcb->head.set = set;
+- return gcb;
+-}
+-
+ /*
+ * Stateful objects
+ */
+--
+2.40.1
+
--- /dev/null
+From 072d961f98136d59cf667a0910ca8a331ec0fafe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:22 +0200
+Subject: netfilter: nf_tables: use correct lock to protect gc_list
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e upstream.
+
+Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to
+protect the gc_list.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 0455af9a66af1..47f3632c78bfb 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9131,9 +9131,9 @@ static void nft_trans_gc_work(struct work_struct *work)
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+- spin_lock(&nf_tables_destroy_list_lock);
++ spin_lock(&nf_tables_gc_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+- spin_unlock(&nf_tables_destroy_list_lock);
++ spin_unlock(&nf_tables_gc_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+--
+2.40.1
+
--- /dev/null
+From 54fc77568939b93b86d45a0f716ed6991cccfd0c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:16 +0200
+Subject: netfilter: nft_set_hash: mark set element as dead when deleting from
+ packet path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+upstream c92db3030492b8ad1d0faace7a93bbcf53850d0c commit.
+
+Set on the NFT_SET_ELEM_DEAD_BIT flag on this element, instead of
+performing element removal which might race with an ongoing transaction.
+Enable gc when dynamic flag is set on since dynset deletion requires
+garbage collection after this patch.
+
+Fixes: d0a8d877da97 ("netfilter: nft_dynset: support for element deletion")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_hash.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index 2f067e4596b02..cef5df8460009 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -249,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
+ if (he == NULL)
+ return false;
+
+- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
++ nft_set_elem_dead(&he->ext);
++
++ return true;
+ }
+
+ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+@@ -412,7 +414,7 @@ static int nft_rhash_init(const struct nft_set *set,
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
+- if (set->flags & NFT_SET_TIMEOUT)
++ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
+ nft_rhash_gc_init(set);
+
+ return 0;
+--
+2.40.1
+
--- /dev/null
+From 9657116a501a18cc09a45fc55605956adab4e392 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:28 +0200
+Subject: netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit b079155faae94e9b3ab9337e82100a914ebb4e8d upstream.
+
+Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC
+might collect the same element more than once.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_hash.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index eca20dc601384..2013de934cef0 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work)
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+- if (PTR_ERR(he) != -EAGAIN) {
+- nft_trans_gc_destroy(gc);
+- gc = NULL;
+- goto try_later;
+- }
+- continue;
++ nft_trans_gc_destroy(gc);
++ gc = NULL;
++ goto try_later;
+ }
+
+ /* Ruleset has been updated, try later. */
+--
+2.40.1
+
--- /dev/null
+From 259b7cc3d0a6d719766b55fff35b491ed033677f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:26 +0200
+Subject: netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall
+ GC
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 4a9e12ea7e70223555ec010bec9f711089ce96f6 upstream.
+
+pipapo needs to enqueue GC transactions for catchall elements through
+nft_trans_gc_queue_sync(). Add nft_trans_gc_catchall_sync() and
+nft_trans_gc_catchall_async() to handle GC transaction queueing
+accordingly.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 5 +++--
+ net/netfilter/nf_tables_api.c | 22 +++++++++++++++++++---
+ net/netfilter/nft_set_hash.c | 2 +-
+ net/netfilter/nft_set_pipapo.c | 2 +-
+ net/netfilter/nft_set_rbtree.c | 2 +-
+ 5 files changed, 25 insertions(+), 8 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 12777a5b60cd0..eb2103a9a7dd9 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1675,8 +1675,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+ void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+- unsigned int gc_seq);
++struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
++ unsigned int gc_seq);
++struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
+
+ void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 47f3632c78bfb..6e67fb999a256 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9233,8 +9233,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+
+-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+- unsigned int gc_seq)
++static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
++ unsigned int gc_seq,
++ bool sync)
+ {
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+@@ -9250,7 +9251,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+
+ nft_set_elem_dead(ext);
+ dead_elem:
+- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++ if (sync)
++ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++ else
++ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++
+ if (!gc)
+ return NULL;
+
+@@ -9260,6 +9265,17 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ return gc;
+ }
+
++struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
++ unsigned int gc_seq)
++{
++ return nft_trans_gc_catchall(gc, gc_seq, false);
++}
++
++struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
++{
++ return nft_trans_gc_catchall(gc, 0, true);
++}
++
+ static void nf_tables_module_autoload_cleanup(struct net *net)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index 524763659f251..eca20dc601384 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -372,7 +372,7 @@ static void nft_rhash_gc(struct work_struct *work)
+ nft_trans_gc_elem_add(gc, he);
+ }
+
+- gc = nft_trans_gc_catchall(gc, gc_seq);
++ gc = nft_trans_gc_catchall_async(gc, gc_seq);
+
+ try_later:
+ /* catchall list iteration requires rcu read side lock. */
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index 58bd514260b90..7248a1737ee14 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -1611,7 +1611,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+ }
+ }
+
+- gc = nft_trans_gc_catchall(gc, 0);
++ gc = nft_trans_gc_catchall_sync(gc);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index 70491ba98decb..487572dcd6144 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -669,7 +669,7 @@ static void nft_rbtree_gc(struct work_struct *work)
+ nft_trans_gc_elem_add(gc, rbe);
+ }
+
+- gc = nft_trans_gc_catchall(gc, gc_seq);
++ gc = nft_trans_gc_catchall_async(gc, gc_seq);
+
+ try_later:
+ read_unlock_bh(&priv->lock);
+--
+2.40.1
+
--- /dev/null
+From 1a2f1ef1fe3846986059aabc11901239dce14481 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:27 +0200
+Subject: netfilter: nft_set_pipapo: stop GC iteration if GC transaction
+ allocation fails
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 6d365eabce3c018a80f6e0379b17df2abb17405e upstream.
+
+nft_trans_gc_queue_sync() enqueues the GC transaction and it allocates a
+new one. If this allocation fails, then stop this GC sync run and retry
+later.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_pipapo.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index 7248a1737ee14..83f5f276c3bff 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -1597,7 +1597,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+- break;
++ return;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+--
+2.40.1
+
--- /dev/null
+From fcc17d87147691671f8cb24ff3b8532fb40bedad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:24 +0200
+Subject: netfilter: nft_set_rbtree: skip sync GC for new elements in this
+ transaction
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 2ee52ae94baabf7ee09cf2a8d854b990dac5d0e4 upstream.
+
+New elements in this transaction might expired before such transaction
+ends. Skip sync GC for such elements otherwise commit path might walk
+over an already released object. Once transaction is finished, async GC
+will collect such expired element.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_rbtree.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index c6435e7092319..f250b5399344a 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *next, *parent, **p, *first = NULL;
+ struct nft_rbtree *priv = nft_set_priv(set);
++ u8 cur_genmask = nft_genmask_cur(net);
+ u8 genmask = nft_genmask_next(net);
+ int d, err;
+
+@@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+- /* perform garbage collection to avoid bogus overlap reports. */
+- if (nft_set_elem_expired(&rbe->ext)) {
++ /* perform garbage collection to avoid bogus overlap reports
++ * but skip new elements in this transaction.
++ */
++ if (nft_set_elem_expired(&rbe->ext) &&
++ nft_set_elem_active(&rbe->ext, cur_genmask)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ if (err < 0)
+ return err;
+--
+2.40.1
+
--- /dev/null
+From 324aa507ead6f24b240aff5bda567ca473e56fc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Sep 2023 18:30:25 +0200
+Subject: netfilter: nft_set_rbtree: use read spinlock to avoid datapath
+ contention
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 96b33300fba880ec0eafcf3d82486f3463b4b6da upstream.
+
+rbtree GC does not modify the datastructure, instead it collects expired
+elements and it enqueues a GC transaction. Use a read spinlock instead
+to avoid data contention while GC worker is running.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_rbtree.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index f250b5399344a..70491ba98decb 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -622,8 +622,7 @@ static void nft_rbtree_gc(struct work_struct *work)
+ if (!gc)
+ goto done;
+
+- write_lock_bh(&priv->lock);
+- write_seqcount_begin(&priv->count);
++ read_lock_bh(&priv->lock);
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+@@ -673,8 +672,7 @@ static void nft_rbtree_gc(struct work_struct *work)
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+ try_later:
+- write_seqcount_end(&priv->count);
+- write_unlock_bh(&priv->lock);
++ read_unlock_bh(&priv->lock);
+
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+--
+2.40.1
+
ext4-replace-the-traditional-ternary-conditional-ope.patch
ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
ext4-do-not-let-fstrim-block-system-suspend.patch
+netfilter-nf_tables-don-t-skip-expired-elements-duri.patch
+netfilter-nf_tables-gc-transaction-api-to-avoid-race.patch
+netfilter-nf_tables-adapt-set-backend-to-use-gc-tran.patch
+netfilter-nft_set_hash-mark-set-element-as-dead-when.patch
+netfilter-nf_tables-remove-busy-mark-and-gc-batch-ap.patch
+netfilter-nf_tables-don-t-fail-inserts-if-duplicate-.patch
+netfilter-nf_tables-fix-gc-transaction-races-with-ne.patch
+netfilter-nf_tables-gc-transaction-race-with-netns-d.patch
+netfilter-nf_tables-gc-transaction-race-with-abort-p.patch
+netfilter-nf_tables-use-correct-lock-to-protect-gc_l.patch
+netfilter-nf_tables-defer-gc-run-if-previous-batch-i.patch
+netfilter-nft_set_rbtree-skip-sync-gc-for-new-elemen.patch
+netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-.patch
+netfilter-nft_set_pipapo-call-nft_trans_gc_queue_syn.patch
+netfilter-nft_set_pipapo-stop-gc-iteration-if-gc-tra.patch
+netfilter-nft_set_hash-try-later-when-gc-hits-eagain.patch
+netfilter-nf_tables-fix-memleak-when-more-than-255-e.patch