]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 07:08:52 +0000 (09:08 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 07:08:52 +0000 (09:08 +0200)
added patches:
netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch
netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch
netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch
netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch
netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch
netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch
netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch
netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch
netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch
netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch
netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch
netfilter-nf_tables-fix-table-flag-updates.patch
netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch
netfilter-nf_tables-gc-transaction-race-with-abort-path.patch
netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch
netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch
netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch
netfilter-nf_tables-pass-context-to-nft_set_destroy.patch
netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch
netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch
netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch
netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch
netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch
netfilter-nf_tables-validate-nfproto_-family.patch
netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch
netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch
netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch
netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch
netfilter-nft_set_rbtree-add-missing-expired-checks.patch
netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch
netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch
netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch
netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch
netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch
netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch
netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch
netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch
netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch
netfilter-nftables-update-table-flags-from-the-commit-phase.patch

41 files changed:
queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch [new file with mode: 0644]
queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch [new file with mode: 0644]
queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch [new file with mode: 0644]
queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch [new file with mode: 0644]
queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch [new file with mode: 0644]
queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch b/queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch
new file mode 100644 (file)
index 0000000..05a7adf
--- /dev/null
@@ -0,0 +1,450 @@
+From stable+bounces-50350-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:56 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:40 +0200
+Subject: netfilter: nf_tables: adapt set backend to use GC transaction API
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-12-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream.
+
+Use the GC transaction API to replace the old and buggy gc API and the
+busy mark approach.
+
+No set elements are removed from async garbage collection anymore,
+instead the _DEAD bit is set on so the set element is not visible from
+lookup path anymore. Async GC enqueues transaction work that might be
+aborted and retried later.
+
+rbtree and pipapo set backends does not set on the _DEAD bit from the
+sync GC path since this runs in control plane path where mutex is held.
+In this case, set elements are deactivated, removed and then released
+via RCU callback, sync GC never fails.
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_hash.c   |   82 +++++++++++++++--------
+ net/netfilter/nft_set_rbtree.c |  142 +++++++++++++++++++++++++----------------
+ 2 files changed, 143 insertions(+), 81 deletions(-)
+
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -20,6 +20,9 @@
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_tables.h>
++#include <net/netns/generic.h>
++
++extern unsigned int nf_tables_net_id;
+ /* We target a hash table size of 4, element hint is 75% of final size */
+ #define NFT_RHASH_ELEMENT_HINT 3
+@@ -62,6 +65,8 @@ static inline int nft_rhash_cmp(struct r
+       if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+               return 1;
++      if (nft_set_elem_is_dead(&he->ext))
++              return 1;
+       if (nft_set_elem_expired(&he->ext))
+               return 1;
+       if (!nft_set_elem_active(&he->ext, x->genmask))
+@@ -190,7 +195,6 @@ static void nft_rhash_activate(const str
+       struct nft_rhash_elem *he = elem->priv;
+       nft_set_elem_change_active(net, set, &he->ext);
+-      nft_set_elem_clear_busy(&he->ext);
+ }
+ static bool nft_rhash_flush(const struct net *net,
+@@ -198,12 +202,9 @@ static bool nft_rhash_flush(const struct
+ {
+       struct nft_rhash_elem *he = priv;
+-      if (!nft_set_elem_mark_busy(&he->ext) ||
+-          !nft_is_active(net, &he->ext)) {
+-              nft_set_elem_change_active(net, set, &he->ext);
+-              return true;
+-      }
+-      return false;
++      nft_set_elem_change_active(net, set, &he->ext);
++
++      return true;
+ }
+ static void *nft_rhash_deactivate(const struct net *net,
+@@ -220,9 +221,8 @@ static void *nft_rhash_deactivate(const
+       rcu_read_lock();
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+-      if (he != NULL &&
+-          !nft_rhash_flush(net, set, he))
+-              he = NULL;
++      if (he)
++              nft_set_elem_change_active(net, set, &he->ext);
+       rcu_read_unlock();
+@@ -288,55 +288,80 @@ out:
+ static void nft_rhash_gc(struct work_struct *work)
+ {
++      struct nftables_pernet *nft_net;
+       struct nft_set *set;
+       struct nft_rhash_elem *he;
+       struct nft_rhash *priv;
+-      struct nft_set_gc_batch *gcb = NULL;
+       struct rhashtable_iter hti;
++      struct nft_trans_gc *gc;
++      struct net *net;
++      u32 gc_seq;
+       int err;
+       priv = container_of(work, struct nft_rhash, gc_work.work);
+       set  = nft_set_container_of(priv);
++      net  = read_pnet(&set->net);
++      nft_net = net_generic(net, nf_tables_net_id);
++      gc_seq = READ_ONCE(nft_net->gc_seq);
++
++      gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
++      if (!gc)
++              goto done;
+       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+-      if (err)
+-              goto schedule;
++      if (err) {
++              nft_trans_gc_destroy(gc);
++              goto done;
++      }
+       rhashtable_walk_start(&hti);
+       while ((he = rhashtable_walk_next(&hti))) {
+               if (IS_ERR(he)) {
+-                      if (PTR_ERR(he) != -EAGAIN)
+-                              goto out;
++                      if (PTR_ERR(he) != -EAGAIN) {
++                              nft_trans_gc_destroy(gc);
++                              gc = NULL;
++                              goto try_later;
++                      }
+                       continue;
+               }
++              /* Ruleset has been updated, try later. */
++              if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
++                      nft_trans_gc_destroy(gc);
++                      gc = NULL;
++                      goto try_later;
++              }
++
++              if (nft_set_elem_is_dead(&he->ext))
++                      goto dead_elem;
++
+               if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
+                       struct nft_expr *expr = nft_set_ext_expr(&he->ext);
+                       if (expr->ops->gc &&
+                           expr->ops->gc(read_pnet(&set->net), expr))
+-                              goto gc;
++                              goto needs_gc_run;
+               }
+               if (!nft_set_elem_expired(&he->ext))
+                       continue;
+-gc:
+-              if (nft_set_elem_mark_busy(&he->ext))
+-                      continue;
++needs_gc_run:
++              nft_set_elem_dead(&he->ext);
++dead_elem:
++              gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++              if (!gc)
++                      goto try_later;
+-              gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+-              if (gcb == NULL)
+-                      goto out;
+-              rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
+-              atomic_dec(&set->nelems);
+-              nft_set_gc_batch_add(gcb, he);
++              nft_trans_gc_elem_add(gc, he);
+       }
+-out:
++try_later:
+       rhashtable_walk_stop(&hti);
+       rhashtable_walk_exit(&hti);
+-      nft_set_gc_batch_complete(gcb);
+-schedule:
++      if (gc)
++              nft_trans_gc_queue_async_done(gc);
++
++done:
+       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                          nft_set_gc_interval(set));
+ }
+@@ -399,7 +424,6 @@ static void nft_rhash_destroy(const stru
+       };
+       cancel_delayed_work_sync(&priv->gc_work);
+-      rcu_barrier();
+       rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
+                                   (void *)&rhash_ctx);
+ }
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -17,6 +17,9 @@
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_tables.h>
++#include <net/netns/generic.h>
++
++extern unsigned int nf_tables_net_id;
+ struct nft_rbtree {
+       struct rb_root          root;
+@@ -49,6 +52,12 @@ static int nft_rbtree_cmp(const struct n
+                     set->klen);
+ }
++static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
++{
++      return nft_set_elem_expired(&rbe->ext) ||
++             nft_set_elem_is_dead(&rbe->ext);
++}
++
+ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+                               const u32 *key, const struct nft_set_ext **ext,
+                               unsigned int seq)
+@@ -83,7 +92,7 @@ static bool __nft_rbtree_lookup(const st
+                               continue;
+                       }
+-                      if (nft_set_elem_expired(&rbe->ext))
++                      if (nft_rbtree_elem_expired(rbe))
+                               return false;
+                       if (nft_rbtree_interval_end(rbe)) {
+@@ -101,7 +110,7 @@ static bool __nft_rbtree_lookup(const st
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
+-          !nft_set_elem_expired(&interval->ext) &&
++          !nft_rbtree_elem_expired(interval) &&
+           nft_rbtree_interval_start(interval)) {
+               *ext = &interval->ext;
+               return true;
+@@ -217,6 +226,18 @@ static void *nft_rbtree_get(const struct
+       return rbe;
+ }
++static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
++                               struct nft_rbtree *priv,
++                               struct nft_rbtree_elem *rbe)
++{
++      struct nft_set_elem elem = {
++              .priv   = rbe,
++      };
++
++      nft_setelem_data_deactivate(net, set, &elem);
++      rb_erase(&rbe->node, &priv->root);
++}
++
+ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+                             struct nft_rbtree *priv,
+                             struct nft_rbtree_elem *rbe,
+@@ -224,11 +245,12 @@ static int nft_rbtree_gc_elem(const stru
+ {
+       struct nft_set *set = (struct nft_set *)__set;
+       struct rb_node *prev = rb_prev(&rbe->node);
++      struct net *net = read_pnet(&set->net);
+       struct nft_rbtree_elem *rbe_prev;
+-      struct nft_set_gc_batch *gcb;
++      struct nft_trans_gc *gc;
+-      gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+-      if (!gcb)
++      gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
++      if (!gc)
+               return -ENOMEM;
+       /* search for end interval coming before this element.
+@@ -246,17 +268,28 @@ static int nft_rbtree_gc_elem(const stru
+       if (prev) {
+               rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++              nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+-              rb_erase(&rbe_prev->node, &priv->root);
+-              atomic_dec(&set->nelems);
+-              nft_set_gc_batch_add(gcb, rbe_prev);
++              /* There is always room in this trans gc for this element,
++               * memory allocation never actually happens, hence, the warning
++               * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
++               * this is synchronous gc which never fails.
++               */
++              gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++              if (WARN_ON_ONCE(!gc))
++                      return -ENOMEM;
++
++              nft_trans_gc_elem_add(gc, rbe_prev);
+       }
+-      rb_erase(&rbe->node, &priv->root);
+-      atomic_dec(&set->nelems);
++      nft_rbtree_gc_remove(net, set, priv, rbe);
++      gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
++      if (WARN_ON_ONCE(!gc))
++              return -ENOMEM;
++
++      nft_trans_gc_elem_add(gc, rbe);
+-      nft_set_gc_batch_add(gcb, rbe);
+-      nft_set_gc_batch_complete(gcb);
++      nft_trans_gc_queue_sync_done(gc);
+       return 0;
+ }
+@@ -484,7 +517,6 @@ static void nft_rbtree_activate(const st
+       struct nft_rbtree_elem *rbe = elem->priv;
+       nft_set_elem_change_active(net, set, &rbe->ext);
+-      nft_set_elem_clear_busy(&rbe->ext);
+ }
+ static bool nft_rbtree_flush(const struct net *net,
+@@ -492,12 +524,9 @@ static bool nft_rbtree_flush(const struc
+ {
+       struct nft_rbtree_elem *rbe = priv;
+-      if (!nft_set_elem_mark_busy(&rbe->ext) ||
+-          !nft_is_active(net, &rbe->ext)) {
+-              nft_set_elem_change_active(net, set, &rbe->ext);
+-              return true;
+-      }
+-      return false;
++      nft_set_elem_change_active(net, set, &rbe->ext);
++
++      return true;
+ }
+ static void *nft_rbtree_deactivate(const struct net *net,
+@@ -574,26 +603,40 @@ cont:
+ static void nft_rbtree_gc(struct work_struct *work)
+ {
+-      struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
+-      struct nft_set_gc_batch *gcb = NULL;
++      struct nft_rbtree_elem *rbe, *rbe_end = NULL;
++      struct nftables_pernet *nft_net;
+       struct nft_rbtree *priv;
++      struct nft_trans_gc *gc;
+       struct rb_node *node;
+       struct nft_set *set;
++      unsigned int gc_seq;
+       struct net *net;
+-      u8 genmask;
+       priv = container_of(work, struct nft_rbtree, gc_work.work);
+       set  = nft_set_container_of(priv);
+       net  = read_pnet(&set->net);
+-      genmask = nft_genmask_cur(net);
++      nft_net = net_generic(net, nf_tables_net_id);
++      gc_seq  = READ_ONCE(nft_net->gc_seq);
++
++      gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
++      if (!gc)
++              goto done;
+       write_lock_bh(&priv->lock);
+       write_seqcount_begin(&priv->count);
+       for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
++
++              /* Ruleset has been updated, try later. */
++              if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
++                      nft_trans_gc_destroy(gc);
++                      gc = NULL;
++                      goto try_later;
++              }
++
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+-              if (!nft_set_elem_active(&rbe->ext, genmask))
+-                      continue;
++              if (nft_set_elem_is_dead(&rbe->ext))
++                      goto dead_elem;
+               /* elements are reversed in the rbtree for historical reasons,
+                * from highest to lowest value, that is why end element is
+@@ -603,43 +646,38 @@ static void nft_rbtree_gc(struct work_st
+                       rbe_end = rbe;
+                       continue;
+               }
++
+               if (!nft_set_elem_expired(&rbe->ext))
+                       continue;
+-              if (nft_set_elem_mark_busy(&rbe->ext)) {
+-                      rbe_end = NULL;
++              nft_set_elem_dead(&rbe->ext);
++
++              if (!rbe_end)
+                       continue;
+-              }
+-              if (rbe_prev) {
+-                      rb_erase(&rbe_prev->node, &priv->root);
+-                      rbe_prev = NULL;
+-              }
+-              gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+-              if (!gcb)
+-                      break;
++              nft_set_elem_dead(&rbe_end->ext);
+-              atomic_dec(&set->nelems);
+-              nft_set_gc_batch_add(gcb, rbe);
+-              rbe_prev = rbe;
+-
+-              if (rbe_end) {
+-                      atomic_dec(&set->nelems);
+-                      nft_set_gc_batch_add(gcb, rbe_end);
+-                      rb_erase(&rbe_end->node, &priv->root);
+-                      rbe_end = NULL;
+-              }
+-              node = rb_next(node);
+-              if (!node)
+-                      break;
++              gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++              if (!gc)
++                      goto try_later;
++
++              nft_trans_gc_elem_add(gc, rbe_end);
++              rbe_end = NULL;
++dead_elem:
++              gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
++              if (!gc)
++                      goto try_later;
++
++              nft_trans_gc_elem_add(gc, rbe);
+       }
+-      if (rbe_prev)
+-              rb_erase(&rbe_prev->node, &priv->root);
++
++try_later:
+       write_seqcount_end(&priv->count);
+       write_unlock_bh(&priv->lock);
+-      nft_set_gc_batch_complete(gcb);
+-
++      if (gc)
++              nft_trans_gc_queue_async_done(gc);
++done:
+       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                          nft_set_gc_interval(set));
+ }
diff --git a/queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch b/queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch
new file mode 100644 (file)
index 0000000..d66a665
--- /dev/null
@@ -0,0 +1,106 @@
+From stable+bounces-50376-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:11 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:05 +0200
+Subject: netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-37-pablo@netfilter.org>
+
+From: Ignat Korchagin <ignat@cloudflare.com>
+
+[ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ]
+
+Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added
+some validation of NFPROTO_* families in the nft_compat module, but it broke
+the ability to use legacy iptables modules in dual-stack nftables.
+
+While with legacy iptables one had to independently manage IPv4 and IPv6
+tables, with nftables it is possible to have dual-stack tables sharing the
+rules. Moreover, it was possible to use rules based on legacy iptables
+match/target modules in dual-stack nftables.
+
+As an example, the program from [2] creates an INET dual-stack family table
+using an xt_bpf based rule, which looks like the following (the actual output
+was generated with a patched nft tool as the current nft tool does not parse
+dual stack tables with legacy match rules, so consider it for illustrative
+purposes only):
+
+table inet testfw {
+  chain input {
+    type filter hook prerouting priority filter; policy accept;
+    bytecode counter packets 0 bytes 0 accept
+  }
+}
+
+After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get
+EOPNOTSUPP for the above program.
+
+Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also
+restrict the functions to classic iptables hooks.
+
+Changes in v3:
+  * clarify that upstream nft will not display such configuration properly and
+    that the output was generated with a patched nft tool
+  * remove example program from commit description and link to it instead
+  * no code changes otherwise
+
+Changes in v2:
+  * restrict nft_(match/target)_validate() to classic iptables hooks
+  * rewrite example program to use unmodified libnftnl
+
+Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family")
+Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1]
+Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2]
+Reported-by: Jordan Griege <jgriege@cloudflare.com>
+Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_compat.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -321,10 +321,20 @@ static int nft_target_validate(const str
+       if (ctx->family != NFPROTO_IPV4 &&
+           ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET &&
+           ctx->family != NFPROTO_BRIDGE &&
+           ctx->family != NFPROTO_ARP)
+               return -EOPNOTSUPP;
++      ret = nft_chain_validate_hooks(ctx->chain,
++                                     (1 << NF_INET_PRE_ROUTING) |
++                                     (1 << NF_INET_LOCAL_IN) |
++                                     (1 << NF_INET_FORWARD) |
++                                     (1 << NF_INET_LOCAL_OUT) |
++                                     (1 << NF_INET_POST_ROUTING));
++      if (ret)
++              return ret;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
+@@ -568,10 +578,20 @@ static int nft_match_validate(const stru
+       if (ctx->family != NFPROTO_IPV4 &&
+           ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET &&
+           ctx->family != NFPROTO_BRIDGE &&
+           ctx->family != NFPROTO_ARP)
+               return -EOPNOTSUPP;
++      ret = nft_chain_validate_hooks(ctx->chain,
++                                     (1 << NF_INET_PRE_ROUTING) |
++                                     (1 << NF_INET_LOCAL_IN) |
++                                     (1 << NF_INET_FORWARD) |
++                                     (1 << NF_INET_LOCAL_OUT) |
++                                     (1 << NF_INET_POST_ROUTING));
++      if (ret)
++              return ret;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
diff --git a/queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch b/queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch
new file mode 100644 (file)
index 0000000..75df797
--- /dev/null
@@ -0,0 +1,39 @@
+From stable+bounces-50366-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:37 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:55 +0200
+Subject: netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush (for 4.19)
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-27-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+3f0465a9ef02 ("netfilter: nf_tables: dynamically allocate hooks per
+net_device in flowtables") reworks flowtable support to allow for
+dynamic allocation of hooks, which implicitly fixes the following
+bogus EBUSY in transaction:
+
+  delete flowtable
+  add flowtable # same flowtable with same devices, it hits EBUSY
+
+This patch does not exist in any tree, but it fixes this issue for
+-stable Linux kernel 4.19
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5956,6 +5956,9 @@ static int nf_tables_newflowtable(struct
+                       continue;
+               list_for_each_entry(ft, &table->flowtables, list) {
++                      if (!nft_is_active_next(net, ft))
++                              continue;
++
+                       for (k = 0; k < ft->ops_len; k++) {
+                               if (!ft->ops[k].dev)
+                                       continue;
diff --git a/queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch b/queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch
new file mode 100644 (file)
index 0000000..231cddb
--- /dev/null
@@ -0,0 +1,74 @@
+From stable+bounces-50355-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:06 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:45 +0200
+Subject: netfilter: nf_tables: defer gc run if previous batch is still pending
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-17-pablo@netfilter.org>
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 8e51830e29e12670b4c10df070a4ea4c9593e961 upstream.
+
+Don't queue more gc work, else we may queue the same elements multiple
+times.
+
+If an element is flagged as dead, this can mean that either the previous
+gc request was invalidated/discarded by a transaction or that the previous
+request is still pending in the system work queue.
+
+The latter will happen if the gc interval is set to a very low value,
+e.g. 1ms, and system work queue is backlogged.
+
+The sets refcount is 1 if no previous gc requeusts are queued, so add
+a helper for this and skip gc run if old requests are pending.
+
+Add a helper for this and skip the gc run in this case.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |    5 +++++
+ net/netfilter/nft_set_hash.c      |    3 +++
+ net/netfilter/nft_set_rbtree.c    |    3 +++
+ 3 files changed, 11 insertions(+)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -445,6 +445,11 @@ static inline void *nft_set_priv(const s
+       return (void *)set->data;
+ }
++static inline bool nft_set_gc_is_pending(const struct nft_set *s)
++{
++      return refcount_read(&s->refs) != 1;
++}
++
+ static inline struct nft_set *nft_set_container_of(const void *priv)
+ {
+       return (void *)priv - offsetof(struct nft_set, data);
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -304,6 +304,9 @@ static void nft_rhash_gc(struct work_str
+       nft_net = net_generic(net, nf_tables_net_id);
+       gc_seq = READ_ONCE(nft_net->gc_seq);
++      if (nft_set_gc_is_pending(set))
++              goto done;
++
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -618,6 +618,9 @@ static void nft_rbtree_gc(struct work_st
+       nft_net = net_generic(net, nf_tables_net_id);
+       gc_seq  = READ_ONCE(nft_net->gc_seq);
++      if (nft_set_gc_is_pending(set))
++              goto done;
++
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
diff --git a/queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch b/queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch
new file mode 100644 (file)
index 0000000..71886af
--- /dev/null
@@ -0,0 +1,56 @@
+From stable+bounces-50365-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:36 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:54 +0200
+Subject: netfilter: nf_tables: disable toggling dormant table state more than once
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-26-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 upstream.
+
+nft -f -<<EOF
+add table ip t
+add table ip t { flags dormant; }
+add chain ip t c { type filter hook input priority 0; }
+add table ip t
+EOF
+
+Triggers a splat from nf core on next table delete because we lose
+track of right hook register state:
+
+WARNING: CPU: 2 PID: 1597 at net/netfilter/core.c:501 __nf_unregister_net_hook
+RIP: 0010:__nf_unregister_net_hook+0x41b/0x570
+ nf_unregister_net_hook+0xb4/0xf0
+ __nf_tables_unregister_hook+0x160/0x1d0
+[..]
+
+The above should have table in *active* state, but in fact no
+hooks were registered.
+
+Reject on/off/on games rather than attempting to fix this.
+
+Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
+Reported-by: "Lee, Cherie-Anne" <cherie.lee@starlabs.sg>
+Cc: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Cc: info@starlabs.sg
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -893,6 +893,10 @@ static int nf_tables_updtable(struct nft
+       if (flags == ctx->table->flags)
+               return 0;
++      /* No dormant off/on/off/on games in single transaction */
++      if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++              return -EINVAL;
++
+       trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+                               sizeof(struct nft_trans_table));
+       if (trans == NULL)
diff --git a/queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch b/queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch
new file mode 100644 (file)
index 0000000..46962fe
--- /dev/null
@@ -0,0 +1,61 @@
+From stable+bounces-50380-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:22 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:09 +0200
+Subject: netfilter: nf_tables: discard table flag update with pending basechain deletion
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-41-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 1bc83a019bbe268be3526406245ec28c2458a518 upstream.
+
+Hook unregistration is deferred to the commit phase, same occurs with
+hook updates triggered by the table dormant flag. When both commands are
+combined, this results in deleting a basechain while leaving its hook
+still registered in the core.
+
+Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -877,6 +877,24 @@ static void nf_tables_table_disable(stru
+ #define __NFT_TABLE_F_UPDATE          (__NFT_TABLE_F_WAS_DORMANT | \
+                                        __NFT_TABLE_F_WAS_AWAKEN)
++static bool nft_table_pending_update(const struct nft_ctx *ctx)
++{
++      struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
++      struct nft_trans *trans;
++
++      if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++              return true;
++
++      list_for_each_entry(trans, &nft_net->commit_list, list) {
++              if (trans->ctx.table == ctx->table &&
++                  trans->msg_type == NFT_MSG_DELCHAIN &&
++                  nft_is_base_chain(trans->ctx.chain))
++                      return true;
++      }
++
++      return false;
++}
++
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+ {
+       struct nft_trans *trans;
+@@ -894,7 +912,7 @@ static int nf_tables_updtable(struct nft
+               return 0;
+       /* No dormant off/on/off/on games in single transaction */
+-      if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++      if (nft_table_pending_update(ctx))
+               return -EINVAL;
+       trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
diff --git a/queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch b/queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch
new file mode 100644 (file)
index 0000000..d8fce0a
--- /dev/null
@@ -0,0 +1,33 @@
+From stable+bounces-50377-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:15 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:06 +0200
+Subject: netfilter: nf_tables: do not compare internal table flags on updates
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-38-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139 ]
+
+Restore skipping transaction if table update does not modify flags.
+
+Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -890,7 +890,7 @@ static int nf_tables_updtable(struct nft
+       if (flags & ~NFT_TABLE_F_DORMANT)
+               return -EINVAL;
+-      if (flags == ctx->table->flags)
++      if (flags == (ctx->table->flags & NFT_TABLE_F_MASK))
+               return 0;
+       /* No dormant off/on/off/on games in single transaction */
diff --git a/queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch b/queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch
new file mode 100644 (file)
index 0000000..a36c7f7
--- /dev/null
@@ -0,0 +1,90 @@
+From stable+bounces-50349-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:50 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:38 +0200
+Subject: netfilter: nf_tables: don't skip expired elements during walk
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-10-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 24138933b97b055d486e8064b4a1721702442a9b upstream.
+
+There is an asymmetry between commit/abort and preparation phase if the
+following conditions are met:
+
+1. set is a verdict map ("1.2.3.4 : jump foo")
+2. timeouts are enabled
+
+In this case, following sequence is problematic:
+
+1. element E in set S refers to chain C
+2. userspace requests removal of set S
+3. kernel does a set walk to decrement chain->use count for all elements
+   from preparation phase
+4. kernel does another set walk to remove elements from the commit phase
+   (or another walk to do a chain->use increment for all elements from
+    abort phase)
+
+If E has already expired in 1), it will be ignored during list walk, so its use count
+won't have been changed.
+
+Then, when set is culled, ->destroy callback will zap the element via
+nf_tables_set_elem_destroy(), but this function is only safe for
+elements that have been deactivated earlier from the preparation phase:
+lack of earlier deactivate removes the element but leaks the chain use
+count, which results in a WARN splat when the chain gets removed later,
+plus a leak of the nft_chain structure.
+
+Update pipapo_get() not to skip expired elements, otherwise flush
+command reports bogus ENOENT errors.
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c  |    4 ++++
+ net/netfilter/nft_set_hash.c   |    2 --
+ net/netfilter/nft_set_rbtree.c |    2 --
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4159,8 +4159,12 @@ static int nf_tables_dump_setelem(const
+                                 const struct nft_set_iter *iter,
+                                 struct nft_set_elem *elem)
+ {
++      const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+       struct nft_set_dump_args *args;
++      if (nft_set_elem_expired(ext))
++              return 0;
++
+       args = container_of(iter, struct nft_set_dump_args, iter);
+       return nf_tables_fill_setelem(args->skb, set, elem);
+ }
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -268,8 +268,6 @@ static void nft_rhash_walk(const struct
+               if (iter->count < iter->skip)
+                       goto cont;
+-              if (nft_set_elem_expired(&he->ext))
+-                      goto cont;
+               if (!nft_set_elem_active(&he->ext, iter->genmask))
+                       goto cont;
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -556,8 +556,6 @@ static void nft_rbtree_walk(const struct
+               if (iter->count < iter->skip)
+                       goto cont;
+-              if (nft_set_elem_expired(&rbe->ext))
+-                      goto cont;
+               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+                       goto cont;
diff --git a/queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch b/queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
new file mode 100644 (file)
index 0000000..16a5b97
--- /dev/null
@@ -0,0 +1,126 @@
+From stable+bounces-50362-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:28 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:51 +0200
+Subject: netfilter: nf_tables: double hook unregistration in netns path
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-23-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream.
+
+__nft_release_hooks() is called from pre_netns exit path which
+unregisters the hooks, then the NETDEV_UNREGISTER event is triggered
+which unregisters the hooks again.
+
+[  565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270
+[...]
+[  565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G            E     5.18.0-rc7+ #27
+[  565.253682] Workqueue: netns cleanup_net
+[  565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270
+[...]
+[  565.297120] Call Trace:
+[  565.300900]  <TASK>
+[  565.304683]  nf_tables_flowtable_event+0x16a/0x220 [nf_tables]
+[  565.308518]  raw_notifier_call_chain+0x63/0x80
+[  565.312386]  unregister_netdevice_many+0x54f/0xb50
+
+Unregister and destroy netdev hook from netns pre_exit via kfree_rcu
+so the NETDEV_UNREGISTER path see unregistered hooks.
+
+Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c    |   34 +++++++++++++++++++++++++++-------
+ net/netfilter/nft_chain_filter.c |    3 +++
+ 2 files changed, 30 insertions(+), 7 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -204,9 +204,10 @@ static int nf_tables_register_hook(struc
+       return nf_register_net_hook(net, ops);
+ }
+-static void nf_tables_unregister_hook(struct net *net,
+-                                    const struct nft_table *table,
+-                                    struct nft_chain *chain)
++static void __nf_tables_unregister_hook(struct net *net,
++                                      const struct nft_table *table,
++                                      struct nft_chain *chain,
++                                      bool release_netdev)
+ {
+       const struct nft_base_chain *basechain;
+       const struct nf_hook_ops *ops;
+@@ -221,6 +222,16 @@ static void nf_tables_unregister_hook(st
+               return basechain->type->ops_unregister(net, ops);
+       nf_unregister_net_hook(net, ops);
++      if (release_netdev &&
++          table->family == NFPROTO_NETDEV)
++              nft_base_chain(chain)->ops.dev = NULL;
++}
++
++static void nf_tables_unregister_hook(struct net *net,
++                                    const struct nft_table *table,
++                                    struct nft_chain *chain)
++{
++      __nf_tables_unregister_hook(net, table, chain, false);
+ }
+ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+@@ -5821,8 +5832,9 @@ nft_flowtable_type_get(struct net *net,
+       return ERR_PTR(-ENOENT);
+ }
+-static void nft_unregister_flowtable_net_hooks(struct net *net,
+-                                             struct nft_flowtable *flowtable)
++static void __nft_unregister_flowtable_net_hooks(struct net *net,
++                                               struct nft_flowtable *flowtable,
++                                               bool release_netdev)
+ {
+       int i;
+@@ -5831,9 +5843,17 @@ static void nft_unregister_flowtable_net
+                       continue;
+               nf_unregister_net_hook(net, &flowtable->ops[i]);
++              if (release_netdev)
++                      flowtable->ops[i].dev = NULL;
+       }
+ }
++static void nft_unregister_flowtable_net_hooks(struct net *net,
++                                             struct nft_flowtable *flowtable)
++{
++      __nft_unregister_flowtable_net_hooks(net, flowtable, false);
++}
++
+ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
+                                 struct sk_buff *skb,
+                                 const struct nlmsghdr *nlh,
+@@ -7862,9 +7882,9 @@ static void __nft_release_table(struct n
+       ctx.family = table->family;
+       list_for_each_entry(chain, &table->chains, list)
+-              nf_tables_unregister_hook(net, table, chain);
++              __nf_tables_unregister_hook(net, table, chain, true);
+       list_for_each_entry(flowtable, &table->flowtables, list)
+-              nft_unregister_flowtable_net_hooks(net, flowtable);
++              __nft_unregister_flowtable_net_hooks(net, flowtable, true);
+       /* No packets are walking on these chains anymore. */
+       ctx.table = table;
+       list_for_each_entry(chain, &table->chains, list) {
+--- a/net/netfilter/nft_chain_filter.c
++++ b/net/netfilter/nft_chain_filter.c
+@@ -296,6 +296,9 @@ static void nft_netdev_event(unsigned lo
+               if (strcmp(basechain->dev_name, dev->name) != 0)
+                       return;
++              if (!basechain->ops.dev)
++                      return;
++
+               /* UNREGISTER events are also happpening on netns exit.
+                *
+                * Altough nf_tables core releases all tables/chains, only
diff --git a/queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch b/queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch
new file mode 100644 (file)
index 0000000..22057eb
--- /dev/null
@@ -0,0 +1,328 @@
+From stable+bounces-50344-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:39 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:32 +0200
+Subject: netfilter: nf_tables: drop map element references from preparation phase
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-4-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 628bd3e49cba1c066228e23d71a852c23e26da73 ]
+
+set .destroy callback releases the references to other objects in maps.
+This is very late and it results in spurious EBUSY errors. Drop refcount
+from the preparation phase instead, update set backend not to drop
+reference counter from set .destroy path.
+
+Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the
+reference counter because the transaction abort path releases the map
+references for each element since the set is unbound. The abort path
+also deals with releasing reference counter for new elements added to
+unbound sets.
+
+Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |    5 +-
+ net/netfilter/nf_tables_api.c     |   89 ++++++++++++++++++++++++++++++++++----
+ net/netfilter/nft_set_bitmap.c    |    5 +-
+ net/netfilter/nft_set_hash.c      |   23 +++++++--
+ net/netfilter/nft_set_rbtree.c    |    5 +-
+ 5 files changed, 108 insertions(+), 19 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -349,7 +349,8 @@ struct nft_set_ops {
+       int                             (*init)(const struct nft_set *set,
+                                               const struct nft_set_desc *desc,
+                                               const struct nlattr * const nla[]);
+-      void                            (*destroy)(const struct nft_set *set);
++      void                            (*destroy)(const struct nft_ctx *ctx,
++                                                 const struct nft_set *set);
+       void                            (*gc_init)(const struct nft_set *set);
+       unsigned int                    elemsize;
+@@ -645,6 +646,8 @@ void *nft_set_elem_init(const struct nft
+                       u64 timeout, gfp_t gfp);
+ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+                         bool destroy_expr);
++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
++                              const struct nft_set *set, void *elem);
+ /**
+  *    struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -388,6 +388,31 @@ static int nft_trans_set_add(const struc
+       return 0;
+ }
++static void nft_setelem_data_deactivate(const struct net *net,
++                                      const struct nft_set *set,
++                                      struct nft_set_elem *elem);
++
++static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
++                                struct nft_set *set,
++                                const struct nft_set_iter *iter,
++                                struct nft_set_elem *elem)
++{
++      nft_setelem_data_deactivate(ctx->net, set, elem);
++
++      return 0;
++}
++
++static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
++{
++      struct nft_set_iter iter = {
++              .genmask        = nft_genmask_next(ctx->net),
++              .fn             = nft_mapelem_deactivate,
++      };
++
++      set->ops->walk(ctx, set, &iter);
++      WARN_ON_ONCE(iter.err);
++}
++
+ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+       int err;
+@@ -396,6 +421,9 @@ static int nft_delset(const struct nft_c
+       if (err < 0)
+               return err;
++      if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++              nft_map_deactivate(ctx, set);
++
+       nft_deactivate_next(ctx->net, set);
+       nft_use_dec(&ctx->table->use);
+@@ -3741,7 +3769,7 @@ static int nf_tables_newset(struct net *
+       return 0;
+ err4:
+-      ops->destroy(set);
++      ops->destroy(&ctx, set);
+ err3:
+       kfree(set->name);
+ err2:
+@@ -3758,7 +3786,7 @@ static void nft_set_destroy(const struct
+       if (WARN_ON(set->use > 0))
+               return;
+-      set->ops->destroy(set);
++      set->ops->destroy(ctx, set);
+       module_put(to_set_type(set->ops)->owner);
+       kfree(set->name);
+       kvfree(set);
+@@ -3883,10 +3911,39 @@ void nf_tables_unbind_set(const struct n
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
++static void nft_setelem_data_activate(const struct net *net,
++                                    const struct nft_set *set,
++                                    struct nft_set_elem *elem);
++
++static int nft_mapelem_activate(const struct nft_ctx *ctx,
++                              struct nft_set *set,
++                              const struct nft_set_iter *iter,
++                              struct nft_set_elem *elem)
++{
++      nft_setelem_data_activate(ctx->net, set, elem);
++
++      return 0;
++}
++
++static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
++{
++      struct nft_set_iter iter = {
++              .genmask        = nft_genmask_next(ctx->net),
++              .fn             = nft_mapelem_activate,
++      };
++
++      set->ops->walk(ctx, set, &iter);
++      WARN_ON_ONCE(iter.err);
++}
++
+ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+-      if (nft_set_is_anonymous(set))
++      if (nft_set_is_anonymous(set)) {
++              if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++                      nft_map_activate(ctx, set);
++
+               nft_clear(ctx->net, set);
++      }
+       nft_use_inc_restore(&set->use);
+ }
+@@ -3907,13 +3964,20 @@ void nf_tables_deactivate_set(const stru
+               nft_use_dec(&set->use);
+               break;
+       case NFT_TRANS_PREPARE:
+-              if (nft_set_is_anonymous(set))
+-                      nft_deactivate_next(ctx->net, set);
++              if (nft_set_is_anonymous(set)) {
++                      if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++                              nft_map_deactivate(ctx, set);
++                      nft_deactivate_next(ctx->net, set);
++              }
+               nft_use_dec(&set->use);
+               return;
+       case NFT_TRANS_ABORT:
+       case NFT_TRANS_RELEASE:
++              if (nft_set_is_anonymous(set) &&
++                  set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++                      nft_map_deactivate(ctx, set);
++
+               nft_use_dec(&set->use);
+               /* fall through */
+       default:
+@@ -4473,6 +4537,7 @@ void *nft_set_elem_init(const struct nft
+       return elem;
+ }
++/* Drop references and destroy. Called from gc, dynset and abort path. */
+ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+                         bool destroy_expr)
+ {
+@@ -4501,11 +4566,11 @@ void nft_set_elem_destroy(const struct n
+ }
+ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+-/* Only called from commit path, nft_setelem_data_deactivate() already deals
+- * with the refcounting from the preparation phase.
++/* Destroy element. References have been already dropped in the preparation
++ * path via nft_setelem_data_deactivate().
+  */
+-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+-                                     const struct nft_set *set, void *elem)
++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
++                              const struct nft_set *set, void *elem)
+ {
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+@@ -4513,6 +4578,7 @@ static void nf_tables_set_elem_destroy(c
+               nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
+       kfree(elem);
+ }
++EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy);
+ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+                           const struct nlattr *attr, u32 nlmsg_flags)
+@@ -6940,6 +7006,8 @@ static int __nf_tables_abort(struct net
+               case NFT_MSG_DELSET:
+                       nft_use_inc_restore(&trans->ctx.table->use);
+                       nft_clear(trans->ctx.net, nft_trans_set(trans));
++                      if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++                              nft_map_activate(&trans->ctx, nft_trans_set(trans));
+                       nft_trans_destroy(trans);
+                       break;
+               case NFT_MSG_NEWSETELEM:
+@@ -7604,6 +7672,9 @@ static void __nft_release_table(struct n
+       list_for_each_entry_safe(set, ns, &table->sets, list) {
+               list_del(&set->list);
+               nft_use_dec(&table->use);
++              if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++                      nft_map_deactivate(&ctx, set);
++
+               nft_set_destroy(&ctx, set);
+       }
+       list_for_each_entry_safe(obj, ne, &table->objects, list) {
+--- a/net/netfilter/nft_set_bitmap.c
++++ b/net/netfilter/nft_set_bitmap.c
+@@ -273,13 +273,14 @@ static int nft_bitmap_init(const struct
+       return 0;
+ }
+-static void nft_bitmap_destroy(const struct nft_set *set)
++static void nft_bitmap_destroy(const struct nft_ctx *ctx,
++                             const struct nft_set *set)
+ {
+       struct nft_bitmap *priv = nft_set_priv(set);
+       struct nft_bitmap_elem *be, *n;
+       list_for_each_entry_safe(be, n, &priv->list, head)
+-              nft_set_elem_destroy(set, be, true);
++              nf_tables_set_elem_destroy(ctx, set, be);
+ }
+ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -379,19 +379,31 @@ static int nft_rhash_init(const struct n
+       return 0;
+ }
++struct nft_rhash_ctx {
++      const struct nft_ctx    ctx;
++      const struct nft_set    *set;
++};
++
+ static void nft_rhash_elem_destroy(void *ptr, void *arg)
+ {
+-      nft_set_elem_destroy(arg, ptr, true);
++      struct nft_rhash_ctx *rhash_ctx = arg;
++
++      nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
+ }
+-static void nft_rhash_destroy(const struct nft_set *set)
++static void nft_rhash_destroy(const struct nft_ctx *ctx,
++                            const struct nft_set *set)
+ {
+       struct nft_rhash *priv = nft_set_priv(set);
++      struct nft_rhash_ctx rhash_ctx = {
++              .ctx    = *ctx,
++              .set    = set,
++      };
+       cancel_delayed_work_sync(&priv->gc_work);
+       rcu_barrier();
+       rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
+-                                  (void *)set);
++                                  (void *)&rhash_ctx);
+ }
+ /* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+@@ -629,7 +641,8 @@ static int nft_hash_init(const struct nf
+       return 0;
+ }
+-static void nft_hash_destroy(const struct nft_set *set)
++static void nft_hash_destroy(const struct nft_ctx *ctx,
++                           const struct nft_set *set)
+ {
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+@@ -639,7 +652,7 @@ static void nft_hash_destroy(const struc
+       for (i = 0; i < priv->buckets; i++) {
+               hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+                       hlist_del_rcu(&he->node);
+-                      nft_set_elem_destroy(set, he, true);
++                      nf_tables_set_elem_destroy(ctx, set, he);
+               }
+       }
+ }
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -466,7 +466,8 @@ static int nft_rbtree_init(const struct
+       return 0;
+ }
+-static void nft_rbtree_destroy(const struct nft_set *set)
++static void nft_rbtree_destroy(const struct nft_ctx *ctx,
++                             const struct nft_set *set)
+ {
+       struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree_elem *rbe;
+@@ -477,7 +478,7 @@ static void nft_rbtree_destroy(const str
+       while ((node = priv->root.rb_node) != NULL) {
+               rb_erase(node, &priv->root);
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+-              nft_set_elem_destroy(set, rbe, true);
++              nf_tables_set_elem_destroy(ctx, set, rbe);
+       }
+ }
diff --git a/queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch b/queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch
new file mode 100644 (file)
index 0000000..1709ef9
--- /dev/null
@@ -0,0 +1,89 @@
+From stable+bounces-50352-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:58 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:42 +0200
+Subject: netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-14-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream.
+
+Netlink event path is missing a synchronization point with GC
+transactions. Add GC sequence number update to netns release path and
+netlink event path, any GC transaction losing race will be discarded.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   30 ++++++++++++++++++++++++++----
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6899,6 +6899,22 @@ void nft_trans_gc_queue_sync_done(struct
+ }
+ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done);
++static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
++{
++      unsigned int gc_seq;
++
++      /* Bump gc counter, it becomes odd, this is the busy mark. */
++      gc_seq = READ_ONCE(nft_net->gc_seq);
++      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++
++      return gc_seq;
++}
++
++static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
++{
++      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++}
++
+ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ {
+       struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+@@ -6953,9 +6969,7 @@ static int nf_tables_commit(struct net *
+       while (++nft_net->base_seq == 0)
+               ;
+-      /* Bump gc counter, it becomes odd, this is the busy mark. */
+-      gc_seq = READ_ONCE(nft_net->gc_seq);
+-      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++      gc_seq = nft_gc_seq_begin(nft_net);
+       /* step 3. Start new generation, rules_gen_X now in use. */
+       net->nft.gencursor = nft_gencursor_next(net);
+@@ -7083,7 +7097,7 @@ static int nf_tables_commit(struct net *
+       nf_tables_commit_release(net);
+       nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+-      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++      nft_gc_seq_end(nft_net, gc_seq);
+       mutex_unlock(&nft_net->commit_mutex);
+       return 0;
+@@ -7898,11 +7912,19 @@ static int __net_init nf_tables_init_net
+ static void __net_exit nf_tables_exit_net(struct net *net)
+ {
+       struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
++      unsigned int gc_seq;
+       mutex_lock(&nft_net->commit_mutex);
++
++      gc_seq = nft_gc_seq_begin(nft_net);
++
+       if (!list_empty(&nft_net->commit_list))
+               __nf_tables_abort(net);
++
+       __nft_release_tables(net);
++
++      nft_gc_seq_end(nft_net, gc_seq);
++
+       mutex_unlock(&nft_net->commit_mutex);
+       WARN_ON_ONCE(!list_empty(&nft_net->tables));
+ }
diff --git a/queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch b/queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch
new file mode 100644 (file)
index 0000000..cdd59ec
--- /dev/null
@@ -0,0 +1,83 @@
+From stable+bounces-50360-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:20 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:49 +0200
+Subject: netfilter: nf_tables: fix memleak when more than 255 elements expired
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-21-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream.
+
+When more than 255 elements expired we're supposed to switch to a new gc
+container structure.
+
+This never happens: u8 type will wrap before reaching the boundary
+and nft_trans_gc_space() always returns true.
+
+This means we recycle the initial gc container structure and
+lose track of the elements that came before.
+
+While at it, don't deref 'gc' after we've passed it to call_rcu.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |    2 +-
+ net/netfilter/nf_tables_api.c     |   10 ++++++++--
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1389,7 +1389,7 @@ struct nft_trans_gc {
+       struct net              *net;
+       struct nft_set          *set;
+       u32                     seq;
+-      u8                      count;
++      u16                     count;
+       void                    *priv[NFT_TRANS_GC_BATCHCOUNT];
+       struct rcu_head         rcu;
+ };
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6857,12 +6857,15 @@ static int nft_trans_gc_space(struct nft
+ struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp)
+ {
++      struct nft_set *set;
++
+       if (nft_trans_gc_space(gc))
+               return gc;
++      set = gc->set;
+       nft_trans_gc_queue_work(gc);
+-      return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
++      return nft_trans_gc_alloc(set, gc_seq, gfp);
+ }
+ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async);
+@@ -6879,15 +6882,18 @@ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_asy
+ struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+ {
++      struct nft_set *set;
++
+       if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+               return NULL;
+       if (nft_trans_gc_space(gc))
+               return gc;
++      set = gc->set;
+       call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+-      return nft_trans_gc_alloc(gc->set, 0, gfp);
++      return nft_trans_gc_alloc(set, 0, gfp);
+ }
+ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync);
diff --git a/queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch b/queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch
new file mode 100644 (file)
index 0000000..4caf0ad
--- /dev/null
@@ -0,0 +1,194 @@
+From stable+bounces-50364-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:31 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:53 +0200
+Subject: netfilter: nf_tables: fix table flag updates
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-25-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 179d9ba5559a756f4322583388b3213fe4e391b0 upstream.
+
+The dormant flag need to be updated from the preparation phase,
+otherwise, two consecutive requests to dorm a table in the same batch
+might try to remove the same hooks twice, resulting in the following
+warning:
+
+ hook not found, pf 3 num 0
+ WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480
+ Modules linked in:
+ CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ Workqueue: netns cleanup_net
+ RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480
+
+This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables:
+update table flags from the commit phase") to restore the previous
+behaviour.
+
+However, there is still another problem: A batch containing a series of
+dorm-wakeup-dorm table and vice-versa also trigger the warning above
+since hook unregistration happens from the preparation phase, while hook
+registration occurs from the commit phase.
+
+To fix this problem, this patch adds two internal flags to annotate the
+original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and
+__NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path.
+
+The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update
+with one single transaction.
+
+Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com
+Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h        |    6 ---
+ include/uapi/linux/netfilter/nf_tables.h |    1 
+ net/netfilter/nf_tables_api.c            |   59 +++++++++++++++++++++----------
+ 3 files changed, 41 insertions(+), 25 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1347,16 +1347,10 @@ struct nft_trans_chain {
+ struct nft_trans_table {
+       bool                            update;
+-      u8                              state;
+-      u32                             flags;
+ };
+ #define nft_trans_table_update(trans) \
+       (((struct nft_trans_table *)trans->data)->update)
+-#define nft_trans_table_state(trans)  \
+-      (((struct nft_trans_table *)trans->data)->state)
+-#define nft_trans_table_flags(trans)  \
+-      (((struct nft_trans_table *)trans->data)->flags)
+ struct nft_trans_elem {
+       struct nft_set                  *set;
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -162,6 +162,7 @@ enum nft_hook_attributes {
+ enum nft_table_flags {
+       NFT_TABLE_F_DORMANT     = 0x1,
+ };
++#define NFT_TABLE_F_MASK       (NFT_TABLE_F_DORMANT)
+ /**
+  * enum nft_table_attributes - nf_tables table netlink attributes
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -676,7 +676,8 @@ static int nf_tables_fill_table_info(str
+               goto nla_put_failure;
+       if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+-          nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
++          nla_put_be32(skb, NFTA_TABLE_FLAGS,
++                       htonl(table->flags & NFT_TABLE_F_MASK)) ||
+           nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
+           nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
+                        NFTA_TABLE_PAD))
+@@ -865,20 +866,22 @@ err:
+ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
+ {
++      table->flags &= ~NFT_TABLE_F_DORMANT;
+       nft_table_disable(net, table, 0);
++      table->flags |= NFT_TABLE_F_DORMANT;
+ }
+-enum {
+-      NFT_TABLE_STATE_UNCHANGED       = 0,
+-      NFT_TABLE_STATE_DORMANT,
+-      NFT_TABLE_STATE_WAKEUP
+-};
++#define __NFT_TABLE_F_INTERNAL                (NFT_TABLE_F_MASK + 1)
++#define __NFT_TABLE_F_WAS_DORMANT     (__NFT_TABLE_F_INTERNAL << 0)
++#define __NFT_TABLE_F_WAS_AWAKEN      (__NFT_TABLE_F_INTERNAL << 1)
++#define __NFT_TABLE_F_UPDATE          (__NFT_TABLE_F_WAS_DORMANT | \
++                                       __NFT_TABLE_F_WAS_AWAKEN)
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+ {
+       struct nft_trans *trans;
+       u32 flags;
+-      int ret = 0;
++      int ret;
+       if (!ctx->nla[NFTA_TABLE_FLAGS])
+               return 0;
+@@ -897,21 +900,27 @@ static int nf_tables_updtable(struct nft
+       if ((flags & NFT_TABLE_F_DORMANT) &&
+           !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+-              nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
++              ctx->table->flags |= NFT_TABLE_F_DORMANT;
++              if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
++                      ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
+       } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+                  ctx->table->flags & NFT_TABLE_F_DORMANT) {
+-              ret = nf_tables_table_enable(ctx->net, ctx->table);
+-              if (ret >= 0)
+-                      nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
++              ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
++              if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
++                      ret = nf_tables_table_enable(ctx->net, ctx->table);
++                      if (ret < 0)
++                              goto err_register_hooks;
++
++                      ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
++              }
+       }
+-      if (ret < 0)
+-              goto err;
+-      nft_trans_table_flags(trans) = flags;
+       nft_trans_table_update(trans) = true;
+       nft_trans_commit_list_add_tail(ctx->net, trans);
++
+       return 0;
+-err:
++
++err_register_hooks:
+       nft_trans_destroy(trans);
+       return ret;
+ }
+@@ -7013,10 +7022,14 @@ static int nf_tables_commit(struct net *
+               switch (trans->msg_type) {
+               case NFT_MSG_NEWTABLE:
+                       if (nft_trans_table_update(trans)) {
+-                              if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
++                              if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
++                                      nft_trans_destroy(trans);
++                                      break;
++                              }
++                              if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
+                                       nf_tables_table_disable(net, trans->ctx.table);
+-                              trans->ctx.table->flags = nft_trans_table_flags(trans);
++                              trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+                       } else {
+                               nft_clear(net, trans->ctx.table);
+                       }
+@@ -7177,9 +7190,17 @@ static int __nf_tables_abort(struct net
+               switch (trans->msg_type) {
+               case NFT_MSG_NEWTABLE:
+                       if (nft_trans_table_update(trans)) {
+-                              if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
++                              if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
++                                      nft_trans_destroy(trans);
++                                      break;
++                              }
++                              if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
+                                       nf_tables_table_disable(net, trans->ctx.table);
+-
++                                      trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
++                              } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
++                                      trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
++                              }
++                              trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+                               nft_trans_destroy(trans);
+                       } else {
+                               list_del_rcu(&trans->ctx.table->list);
diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch
new file mode 100644 (file)
index 0000000..a1c156b
--- /dev/null
@@ -0,0 +1,544 @@
+From stable+bounces-50357-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:13 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:39 +0200
+Subject: netfilter: nf_tables: GC transaction API to avoid race with control plane
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-11-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 5f68718b34a531a556f2f50300ead2862278da26 upstream.
+
+[ this includes
+  8357bc946a2a ("netfilter: nf_tables: use correct lock to protect gc_list") ]
+
+The set types rhashtable and rbtree use a GC worker to reclaim memory.
+>From system work queue, in periodic intervals, a scan of the table is
+done.
+
+The major caveat here is that the nft transaction mutex is not held.
+This causes a race between control plane and GC when they attempt to
+delete the same element.
+
+We cannot grab the netlink mutex from the work queue, because the
+control plane has to wait for the GC work queue in case the set is to be
+removed, so we get following deadlock:
+
+   cpu 1                                cpu2
+     GC work                            transaction comes in , lock nft mutex
+       `acquire nft mutex // BLOCKS
+                                        transaction asks to remove the set
+                                        set destruction calls cancel_work_sync()
+
+cancel_work_sync will now block forever, because it is waiting for the
+mutex the caller already owns.
+
+This patch adds a new API that deals with garbage collection in two
+steps:
+
+1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT
+   so they are not visible via lookup. Annotate current GC sequence in
+   the GC transaction. Enqueue GC transaction work as soon as it is
+   full. If ruleset is updated, then GC transaction is aborted and
+   retried later.
+
+2) GC work grabs the mutex. If GC sequence has changed then this GC
+   transaction lost race with control plane, abort it as it contains
+   stale references to objects and let GC try again later. If the
+   ruleset is intact, then this GC transaction deactivates and removes
+   the elements and it uses call_rcu() to destroy elements.
+
+Note that no elements are removed from GC lockless path, the _DEAD bit
+is set and pointers are collected. GC catchall does not remove the
+elements anymore too. There is a new set->dead flag that is set on to
+abort the GC transaction to deal with set->ops->destroy() path which
+removes the remaining elements in the set from commit_release, where no
+mutex is held.
+
+To deal with GC when mutex is held, which allows safe deactivate and
+removal, add sync GC API which releases the set element object via
+call_rcu(). This is used by rbtree and pipapo backends which also
+perform garbage collection from control plane path.
+
+Since element removal from sets can happen from control plane and
+element garbage collection/timeout, it is necessary to keep the set
+structure alive until all elements have been deactivated and destroyed.
+
+We cannot do a cancel_work_sync or flush_work in nft_set_destroy because
+its called with the transaction mutex held, but the aforementioned async
+work queue might be blocked on the very mutex that nft_set_destroy()
+callchain is sitting on.
+
+This gives us the choice of ABBA deadlock or UaF.
+
+To avoid both, add set->refs refcount_t member. The GC API can then
+increment the set refcount and release it once the elements have been
+free'd.
+
+Set backends are adapted to use the GC transaction API in a follow up
+patch entitled:
+
+  ("netfilter: nf_tables: use gc transaction API in set backends")
+
+This is joint work with Florian Westphal.
+
+Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |   61 ++++++++++
+ net/netfilter/nf_tables_api.c     |  225 ++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 276 insertions(+), 10 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -380,6 +380,7 @@ void nft_unregister_set(struct nft_set_t
+  *
+  *    @list: table set list node
+  *    @bindings: list of set bindings
++ *    @refs: internal refcounting for async set destruction
+  *    @table: table this set belongs to
+  *    @net: netnamespace this set belongs to
+  *    @name: name of the set
+@@ -406,6 +407,7 @@ void nft_unregister_set(struct nft_set_t
+ struct nft_set {
+       struct list_head                list;
+       struct list_head                bindings;
++      refcount_t                      refs;
+       struct nft_table                *table;
+       possible_net_t                  net;
+       char                            *name;
+@@ -424,7 +426,8 @@ struct nft_set {
+       unsigned char                   *udata;
+       /* runtime data below here */
+       const struct nft_set_ops        *ops ____cacheline_aligned;
+-      u16                             flags:14,
++      u16                             flags:13,
++                                      dead:1,
+                                       genmask:2;
+       u8                              klen;
+       u8                              dlen;
+@@ -1346,6 +1349,32 @@ static inline void nft_set_elem_clear_bu
+       clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ }
++#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
++
++#if defined(__LITTLE_ENDIAN_BITFIELD)
++#define NFT_SET_ELEM_DEAD_BIT 3
++#elif defined(__BIG_ENDIAN_BITFIELD)
++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
++#else
++#error
++#endif
++
++static inline void nft_set_elem_dead(struct nft_set_ext *ext)
++{
++      unsigned long *word = (unsigned long *)ext;
++
++      BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
++      set_bit(NFT_SET_ELEM_DEAD_BIT, word);
++}
++
++static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
++{
++      unsigned long *word = (unsigned long *)ext;
++
++      BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
++      return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
++}
++
+ /**
+  *    struct nft_trans - nf_tables object update in transaction
+  *
+@@ -1439,6 +1468,35 @@ struct nft_trans_flowtable {
+ #define nft_trans_flowtable(trans)    \
+       (((struct nft_trans_flowtable *)trans->data)->flowtable)
++#define NFT_TRANS_GC_BATCHCOUNT                256
++
++struct nft_trans_gc {
++      struct list_head        list;
++      struct net              *net;
++      struct nft_set          *set;
++      u32                     seq;
++      u8                      count;
++      void                    *priv[NFT_TRANS_GC_BATCHCOUNT];
++      struct rcu_head         rcu;
++};
++
++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
++                                      unsigned int gc_seq, gfp_t gfp);
++void nft_trans_gc_destroy(struct nft_trans_gc *trans);
++
++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
++                                            unsigned int gc_seq, gfp_t gfp);
++void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
++
++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
++
++void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
++
++void nft_setelem_data_deactivate(const struct net *net,
++                               const struct nft_set *set,
++                               struct nft_set_elem *elem);
++
+ int __init nft_chain_filter_init(void);
+ void nft_chain_filter_fini(void);
+@@ -1451,6 +1509,7 @@ struct nftables_pernet {
+       struct mutex            commit_mutex;
+       unsigned int            base_seq;
+       u8                      validate_state;
++      unsigned int            gc_seq;
+ };
+ #endif /* _NET_NF_TABLES_H */
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -29,10 +29,13 @@
+ #define NFT_SET_MAX_ANONLEN 16
+ unsigned int nf_tables_net_id __read_mostly;
++EXPORT_SYMBOL_GPL(nf_tables_net_id);
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
++static LIST_HEAD(nf_tables_gc_list);
++static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
+ static u64 table_handle;
+ enum {
+@@ -73,6 +76,9 @@ static void nft_validate_state_update(st
+       nft_net->validate_state = new_validate_state;
+ }
++static void nft_trans_gc_work(struct work_struct *work);
++static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
++
+ static void nft_ctx_init(struct nft_ctx *ctx,
+                        struct net *net,
+                        const struct sk_buff *skb,
+@@ -388,10 +394,6 @@ static int nft_trans_set_add(const struc
+       return 0;
+ }
+-static void nft_setelem_data_deactivate(const struct net *net,
+-                                      const struct nft_set *set,
+-                                      struct nft_set_elem *elem);
+-
+ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+                                 struct nft_set *set,
+                                 const struct nft_set_iter *iter,
+@@ -3739,6 +3741,7 @@ static int nf_tables_newset(struct net *
+       }
+       INIT_LIST_HEAD(&set->bindings);
++      refcount_set(&set->refs, 1);
+       set->table = table;
+       write_pnet(&set->net, net);
+       set->ops   = ops;
+@@ -3781,6 +3784,14 @@ err1:
+       return err;
+ }
++static void nft_set_put(struct nft_set *set)
++{
++      if (refcount_dec_and_test(&set->refs)) {
++              kfree(set->name);
++              kvfree(set);
++      }
++}
++
+ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+       if (WARN_ON(set->use > 0))
+@@ -3788,8 +3799,7 @@ static void nft_set_destroy(const struct
+       set->ops->destroy(ctx, set);
+       module_put(to_set_type(set->ops)->owner);
+-      kfree(set->name);
+-      kvfree(set);
++      nft_set_put(set);
+ }
+ static int nf_tables_delset(struct net *net, struct sock *nlsk,
+@@ -4888,9 +4898,9 @@ static void nft_setelem_data_activate(co
+               nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+ }
+-static void nft_setelem_data_deactivate(const struct net *net,
+-                                      const struct nft_set *set,
+-                                      struct nft_set_elem *elem)
++void nft_setelem_data_deactivate(const struct net *net,
++                               const struct nft_set *set,
++                               struct nft_set_elem *elem)
+ {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+@@ -4899,6 +4909,7 @@ static void nft_setelem_data_deactivate(
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+               nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+ }
++EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate);
+ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+                          const struct nlattr *attr)
+@@ -6732,6 +6743,186 @@ static void nft_chain_del(struct nft_cha
+       list_del_rcu(&chain->list);
+ }
++static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
++                                      struct nft_trans_gc *trans)
++{
++      void **priv = trans->priv;
++      unsigned int i;
++
++      for (i = 0; i < trans->count; i++) {
++              struct nft_set_elem elem = {
++                      .priv = priv[i],
++              };
++
++              nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
++              trans->set->ops->remove(trans->net, trans->set, &elem);
++      }
++}
++
++void nft_trans_gc_destroy(struct nft_trans_gc *trans)
++{
++      nft_set_put(trans->set);
++      put_net(trans->net);
++      kfree(trans);
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_destroy);
++
++static void nft_trans_gc_trans_free(struct rcu_head *rcu)
++{
++      struct nft_set_elem elem = {};
++      struct nft_trans_gc *trans;
++      struct nft_ctx ctx = {};
++      unsigned int i;
++
++      trans = container_of(rcu, struct nft_trans_gc, rcu);
++      ctx.net = read_pnet(&trans->set->net);
++
++      for (i = 0; i < trans->count; i++) {
++              elem.priv = trans->priv[i];
++              atomic_dec(&trans->set->nelems);
++
++              nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
++       }
++
++      nft_trans_gc_destroy(trans);
++}
++
++static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
++{
++      struct nftables_pernet *nft_net;
++      struct nft_ctx ctx = {};
++
++      nft_net = net_generic(trans->net, nf_tables_net_id);
++
++      mutex_lock(&nft_net->commit_mutex);
++
++      /* Check for race with transaction, otherwise this batch refers to
++       * stale objects that might not be there anymore. Skip transaction if
++       * set has been destroyed from control plane transaction in case gc
++       * worker loses race.
++       */
++      if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
++              mutex_unlock(&nft_net->commit_mutex);
++              return false;
++      }
++
++      ctx.net = trans->net;
++      ctx.table = trans->set->table;
++
++      nft_trans_gc_setelem_remove(&ctx, trans);
++      mutex_unlock(&nft_net->commit_mutex);
++
++      return true;
++}
++
++static void nft_trans_gc_work(struct work_struct *work)
++{
++      struct nft_trans_gc *trans, *next;
++      LIST_HEAD(trans_gc_list);
++
++      spin_lock(&nf_tables_gc_list_lock);
++      list_splice_init(&nf_tables_gc_list, &trans_gc_list);
++      spin_unlock(&nf_tables_gc_list_lock);
++
++      list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
++              list_del(&trans->list);
++              if (!nft_trans_gc_work_done(trans)) {
++                      nft_trans_gc_destroy(trans);
++                      continue;
++              }
++              call_rcu(&trans->rcu, nft_trans_gc_trans_free);
++      }
++}
++
++struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
++                                      unsigned int gc_seq, gfp_t gfp)
++{
++      struct net *net = read_pnet(&set->net);
++      struct nft_trans_gc *trans;
++
++      trans = kzalloc(sizeof(*trans), gfp);
++      if (!trans)
++              return NULL;
++
++      refcount_inc(&set->refs);
++      trans->set = set;
++      trans->net = get_net(net);
++      trans->seq = gc_seq;
++
++      return trans;
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_alloc);
++
++void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
++{
++      trans->priv[trans->count++] = priv;
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add);
++
++static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
++{
++      spin_lock(&nf_tables_gc_list_lock);
++      list_add_tail(&trans->list, &nf_tables_gc_list);
++      spin_unlock(&nf_tables_gc_list_lock);
++
++      schedule_work(&trans_gc_work);
++}
++
++static int nft_trans_gc_space(struct nft_trans_gc *trans)
++{
++      return NFT_TRANS_GC_BATCHCOUNT - trans->count;
++}
++
++struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
++                                            unsigned int gc_seq, gfp_t gfp)
++{
++      if (nft_trans_gc_space(gc))
++              return gc;
++
++      nft_trans_gc_queue_work(gc);
++
++      return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async);
++
++void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
++{
++      if (trans->count == 0) {
++              nft_trans_gc_destroy(trans);
++              return;
++      }
++
++      nft_trans_gc_queue_work(trans);
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done);
++
++struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
++{
++      if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
++              return NULL;
++
++      if (nft_trans_gc_space(gc))
++              return gc;
++
++      call_rcu(&gc->rcu, nft_trans_gc_trans_free);
++
++      return nft_trans_gc_alloc(gc->set, 0, gfp);
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync);
++
++void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
++{
++      WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
++
++      if (trans->count == 0) {
++              nft_trans_gc_destroy(trans);
++              return;
++      }
++
++      call_rcu(&trans->rcu, nft_trans_gc_trans_free);
++}
++EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done);
++
+ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ {
+       struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+@@ -6739,6 +6930,7 @@ static int nf_tables_commit(struct net *
+       struct nft_trans_elem *te;
+       struct nft_chain *chain;
+       struct nft_table *table;
++      unsigned int gc_seq;
+       list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+               switch (trans->msg_type) {
+@@ -6785,6 +6977,10 @@ static int nf_tables_commit(struct net *
+       while (++nft_net->base_seq == 0)
+               ;
++      /* Bump gc counter, it becomes odd, this is the busy mark. */
++      gc_seq = READ_ONCE(nft_net->gc_seq);
++      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
++
+       /* step 3. Start new generation, rules_gen_X now in use. */
+       net->nft.gencursor = nft_gencursor_next(net);
+@@ -6855,6 +7051,7 @@ static int nf_tables_commit(struct net *
+                       nft_trans_destroy(trans);
+                       break;
+               case NFT_MSG_DELSET:
++                      nft_trans_set(trans)->dead = 1;
+                       list_del_rcu(&nft_trans_set(trans)->list);
+                       nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+                                            NFT_MSG_DELSET, GFP_KERNEL);
+@@ -6909,6 +7106,8 @@ static int nf_tables_commit(struct net *
+       nf_tables_commit_release(net);
+       nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
++
++      WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+       mutex_unlock(&nft_net->commit_mutex);
+       return 0;
+@@ -7715,6 +7914,7 @@ static int __net_init nf_tables_init_net
+       mutex_init(&nft_net->commit_mutex);
+       nft_net->base_seq = 1;
+       nft_net->validate_state = NFT_VALIDATE_SKIP;
++      nft_net->gc_seq = 0;
+       return 0;
+ }
+@@ -7731,9 +7931,15 @@ static void __net_exit nf_tables_exit_ne
+       WARN_ON_ONCE(!list_empty(&nft_net->tables));
+ }
++static void nf_tables_exit_batch(struct list_head *net_exit_list)
++{
++      flush_work(&trans_gc_work);
++}
++
+ static struct pernet_operations nf_tables_net_ops = {
+       .init   = nf_tables_init_net,
+       .exit   = nf_tables_exit_net,
++      .exit_batch = nf_tables_exit_batch,
+       .id     = &nf_tables_net_id,
+       .size   = sizeof(struct nftables_pernet),
+ };
+@@ -7781,6 +7987,7 @@ static void __exit nf_tables_module_exit
+       unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+       nft_chain_filter_fini();
+       unregister_pernet_subsys(&nf_tables_net_ops);
++      cancel_work_sync(&trans_gc_work);
+       rcu_barrier();
+       nf_tables_core_module_exit();
+ }
diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch
new file mode 100644 (file)
index 0000000..6fad4b3
--- /dev/null
@@ -0,0 +1,39 @@
+From stable+bounces-50354-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:05 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:44 +0200
+Subject: netfilter: nf_tables: GC transaction race with abort path
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-16-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream.
+
+Abort path is missing a synchronization point with GC transactions. Add
+GC sequence number hence any GC transaction losing race will be
+discarded.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7263,7 +7263,12 @@ static int __nf_tables_abort(struct net
+ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
+ {
+       struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+-      int ret = __nf_tables_abort(net);
++      unsigned int gc_seq;
++      int ret;
++
++      gc_seq = nft_gc_seq_begin(nft_net);
++      ret = __nf_tables_abort(net);
++      nft_gc_seq_end(nft_net, gc_seq);
+       mutex_unlock(&nft_net->commit_mutex);
diff --git a/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch b/queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch
new file mode 100644 (file)
index 0000000..4b04f74
--- /dev/null
@@ -0,0 +1,40 @@
+From stable+bounces-50353-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:01 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:43 +0200
+Subject: netfilter: nf_tables: GC transaction race with netns dismantle
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-15-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 02c6c24402bf1c1e986899c14ba22a10b510916b upstream.
+
+Use maybe_get_net() since GC workqueue might race with netns exit path.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6820,9 +6820,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(
+       if (!trans)
+               return NULL;
++      trans->net = maybe_get_net(net);
++      if (!trans->net) {
++              kfree(trans);
++              return NULL;
++      }
++
+       refcount_inc(&set->refs);
+       trans->set = set;
+-      trans->net = get_net(net);
+       trans->seq = gc_seq;
+       return trans;
diff --git a/queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch b/queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch
new file mode 100644 (file)
index 0000000..f1e49be
--- /dev/null
@@ -0,0 +1,49 @@
+From stable+bounces-50371-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:52 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:00 +0200
+Subject: netfilter: nf_tables: mark newset as dead on transaction abort
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-32-pablo@netfilter.org>
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 08e4c8c5919fd405a4d709b4ba43d836894a26eb ]
+
+If a transaction is aborted, we should mark the to-be-released NEWSET dead,
+just like commit path does for DEL and DESTROYSET commands.
+
+In both cases all remaining elements will be released via
+set->ops->destroy().
+
+The existing abort code does NOT post the actual release to the work queue.
+Also the entire __nf_tables_abort() function is wrapped in gc_seq
+begin/end pair.
+
+Therefore, async gc worker will never try to release the pending set
+elements, as gc sequence is always stale.
+
+It might be possible to speed up transaction aborts via work queue too,
+this would result in a race and a possible use-after-free.
+
+So fix this before it becomes an issue.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7254,6 +7254,7 @@ static int __nf_tables_abort(struct net
+                               nft_trans_destroy(trans);
+                               break;
+                       }
++                      nft_trans_set(trans)->dead = 1;
+                       list_del_rcu(&nft_trans_set(trans)->list);
+                       break;
+               case NFT_MSG_DELSET:
diff --git a/queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch b/queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch
new file mode 100644 (file)
index 0000000..12ab507
--- /dev/null
@@ -0,0 +1,49 @@
+From stable+bounces-50378-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:16 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:07 +0200
+Subject: netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-39-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream.
+
+While the rhashtable set gc runs asynchronously, a race allows it to
+collect elements from anonymous sets with timeouts while it is being
+released from the commit path.
+
+Mingi Cho originally reported this issue in a different path in 6.1.x
+with a pipapo set with low timeouts which is not possible upstream since
+7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set
+element timeout").
+
+Fix this by setting on the dead flag for anonymous sets to skip async gc
+in this case.
+
+According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on
+transaction abort"), Florian plans to accelerate abort path by releasing
+objects via workqueue, therefore, this sets on the dead flag for abort
+path too.
+
+Cc: stable@vger.kernel.org
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Reported-by: Mingi Cho <mgcho.minic@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3943,6 +3943,7 @@ void nf_tables_unbind_set(const struct n
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
+               list_del_rcu(&set->list);
++              set->dead = 1;
+               if (event)
+                       nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+                                            GFP_KERNEL);
diff --git a/queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch b/queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch
new file mode 100644 (file)
index 0000000..2257a36
--- /dev/null
@@ -0,0 +1,68 @@
+From stable+bounces-50341-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:29 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:30 +0200
+Subject: netfilter: nf_tables: pass context to nft_set_destroy()
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-2-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 0c2a85edd143162b3a698f31e94bf8cdc041da87 upstream.
+
+The patch that adds support for stateful expressions in set definitions
+require this.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3753,7 +3753,7 @@ err1:
+       return err;
+ }
+-static void nft_set_destroy(struct nft_set *set)
++static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+       if (WARN_ON(set->use > 0))
+               return;
+@@ -3926,7 +3926,7 @@ EXPORT_SYMBOL_GPL(nf_tables_deactivate_s
+ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
+-              nft_set_destroy(set);
++              nft_set_destroy(ctx, set);
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+@@ -6503,7 +6503,7 @@ static void nft_commit_release(struct nf
+               nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+               break;
+       case NFT_MSG_DELSET:
+-              nft_set_destroy(nft_trans_set(trans));
++              nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+               break;
+       case NFT_MSG_DELSETELEM:
+               nf_tables_set_elem_destroy(&trans->ctx,
+@@ -6857,7 +6857,7 @@ static void nf_tables_abort_release(stru
+               nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+               break;
+       case NFT_MSG_NEWSET:
+-              nft_set_destroy(nft_trans_set(trans));
++              nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+               break;
+       case NFT_MSG_NEWSETELEM:
+               nft_set_elem_destroy(nft_trans_elem_set(trans),
+@@ -7604,7 +7604,7 @@ static void __nft_release_table(struct n
+       list_for_each_entry_safe(set, ns, &table->sets, list) {
+               list_del(&set->list);
+               nft_use_dec(&table->use);
+-              nft_set_destroy(set);
++              nft_set_destroy(&ctx, set);
+       }
+       list_for_each_entry_safe(obj, ne, &table->objects, list) {
+               list_del(&obj->list);
diff --git a/queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch b/queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch
new file mode 100644 (file)
index 0000000..b5fb330
--- /dev/null
@@ -0,0 +1,63 @@
+From stable+bounces-50379-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:20 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:08 +0200
+Subject: netfilter: nf_tables: reject new basechain after table flag update
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-40-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 994209ddf4f430946f6247616b2e33d179243769 upstream.
+
+When dormant flag is toggled, hooks are disabled in the commit phase by
+iterating over current chains in table (existing and new).
+
+The following configuration allows for an inconsistent state:
+
+  add table x
+  add chain x y { type filter hook input priority 0; }
+  add table x { flags dormant; }
+  add chain x w { type filter hook input priority 1; }
+
+which triggers the following warning when trying to unregister chain w
+which is already unregistered.
+
+[  127.322252] WARNING: CPU: 7 PID: 1211 at net/netfilter/core.c:50                                                                     1 __nf_unregister_net_hook+0x21a/0x260
+[...]
+[  127.322519] Call Trace:
+[  127.322521]  <TASK>
+[  127.322524]  ? __warn+0x9f/0x1a0
+[  127.322531]  ? __nf_unregister_net_hook+0x21a/0x260
+[  127.322537]  ? report_bug+0x1b1/0x1e0
+[  127.322545]  ? handle_bug+0x3c/0x70
+[  127.322552]  ? exc_invalid_op+0x17/0x40
+[  127.322556]  ? asm_exc_invalid_op+0x1a/0x20
+[  127.322563]  ? kasan_save_free_info+0x3b/0x60
+[  127.322570]  ? __nf_unregister_net_hook+0x6a/0x260
+[  127.322577]  ? __nf_unregister_net_hook+0x21a/0x260
+[  127.322583]  ? __nf_unregister_net_hook+0x6a/0x260
+[  127.322590]  ? __nf_tables_unregister_hook+0x8a/0xe0 [nf_tables]
+[  127.322655]  nft_table_disable+0x75/0xf0 [nf_tables]
+[  127.322717]  nf_tables_commit+0x2571/0x2620 [nf_tables]
+
+Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1750,6 +1750,9 @@ static int nf_tables_addchain(struct nft
+               struct nft_chain_hook hook;
+               struct nf_hook_ops *ops;
++              if (table->flags & __NFT_TABLE_F_UPDATE)
++                      return -EINVAL;
++
+               err = nft_chain_parse_hook(net, nla, &hook, family, true);
+               if (err < 0)
+                       return err;
diff --git a/queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch b/queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch
new file mode 100644 (file)
index 0000000..3fc19b9
--- /dev/null
@@ -0,0 +1,182 @@
+From stable+bounces-50351-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:57 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:41 +0200
+Subject: netfilter: nf_tables: remove busy mark and gc batch API
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-13-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream.
+
+Ditch it, it has been replace it by the GC transaction API and it has no
+clients anymore.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |   97 +-------------------------------------
+ net/netfilter/nf_tables_api.c     |   28 ----------
+ 2 files changed, 5 insertions(+), 120 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -652,62 +652,6 @@ void nft_set_elem_destroy(const struct n
+ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+                               const struct nft_set *set, void *elem);
+-/**
+- *    struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+- *
+- *    @rcu: rcu head
+- *    @set: set the elements belong to
+- *    @cnt: count of elements
+- */
+-struct nft_set_gc_batch_head {
+-      struct rcu_head                 rcu;
+-      const struct nft_set            *set;
+-      unsigned int                    cnt;
+-};
+-
+-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE -                             \
+-                                sizeof(struct nft_set_gc_batch_head)) / \
+-                               sizeof(void *))
+-
+-/**
+- *    struct nft_set_gc_batch - nf_tables set garbage collection batch
+- *
+- *    @head: GC batch head
+- *    @elems: garbage collection elements
+- */
+-struct nft_set_gc_batch {
+-      struct nft_set_gc_batch_head    head;
+-      void                            *elems[NFT_SET_GC_BATCH_SIZE];
+-};
+-
+-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+-                                              gfp_t gfp);
+-void nft_set_gc_batch_release(struct rcu_head *rcu);
+-
+-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
+-{
+-      if (gcb != NULL)
+-              call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
+-}
+-
+-static inline struct nft_set_gc_batch *
+-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
+-                     gfp_t gfp)
+-{
+-      if (gcb != NULL) {
+-              if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
+-                      return gcb;
+-              nft_set_gc_batch_complete(gcb);
+-      }
+-      return nft_set_gc_batch_alloc(set, gfp);
+-}
+-
+-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
+-                                      void *elem)
+-{
+-      gcb->elems[gcb->head.cnt++] = elem;
+-}
+-
+ struct nft_expr_ops;
+ /**
+  *    struct nft_expr_type - nf_tables expression type
+@@ -1314,47 +1258,12 @@ static inline void nft_set_elem_change_a
+       ext->genmask ^= nft_genmask_next(net);
+ }
+-/*
+- * We use a free bit in the genmask field to indicate the element
+- * is busy, meaning it is currently being processed either by
+- * the netlink API or GC.
+- *
+- * Even though the genmask is only a single byte wide, this works
+- * because the extension structure if fully constant once initialized,
+- * so there are no non-atomic write accesses unless it is already
+- * marked busy.
+- */
+-#define NFT_SET_ELEM_BUSY_MASK        (1 << 2)
+-
+-#if defined(__LITTLE_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_BUSY_BIT 2
+-#elif defined(__BIG_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+-#else
+-#error
+-#endif
+-
+-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+-{
+-      unsigned long *word = (unsigned long *)ext;
+-
+-      BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+-      return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+-}
+-
+-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+-{
+-      unsigned long *word = (unsigned long *)ext;
+-
+-      clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+-}
+-
+-#define NFT_SET_ELEM_DEAD_MASK (1 << 3)
++#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
+ #if defined(__LITTLE_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_DEAD_BIT 3
++#define NFT_SET_ELEM_DEAD_BIT 2
+ #elif defined(__BIG_ENDIAN_BITFIELD)
+-#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3)
++#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+ #else
+ #error
+ #endif
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4766,7 +4766,8 @@ static int nft_add_set_elem(struct nft_c
+       if (trans == NULL)
+               goto err4;
+-      ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
++      ext->genmask = nft_genmask_cur(ctx->net);
++
+       err = set->ops->insert(ctx->net, set, &elem, &ext2);
+       if (err) {
+               if (err == -EEXIST) {
+@@ -5059,31 +5060,6 @@ static int nf_tables_delsetelem(struct n
+       return err;
+ }
+-void nft_set_gc_batch_release(struct rcu_head *rcu)
+-{
+-      struct nft_set_gc_batch *gcb;
+-      unsigned int i;
+-
+-      gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+-      for (i = 0; i < gcb->head.cnt; i++)
+-              nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
+-      kfree(gcb);
+-}
+-EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+-
+-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+-                                              gfp_t gfp)
+-{
+-      struct nft_set_gc_batch *gcb;
+-
+-      gcb = kzalloc(sizeof(*gcb), gfp);
+-      if (gcb == NULL)
+-              return gcb;
+-      gcb->head.set = set;
+-      return gcb;
+-}
+-EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+-
+ /*
+  * Stateful objects
+  */
diff --git a/queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch b/queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch
new file mode 100644 (file)
index 0000000..59cd79f
--- /dev/null
@@ -0,0 +1,41 @@
+From stable+bounces-50375-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:07 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:04 +0200
+Subject: netfilter: nf_tables: set dormant flag on hook register failure
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-36-pablo@netfilter.org>
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit bccebf64701735533c8db37773eeacc6566cc8ec ]
+
+We need to set the dormant flag again if we fail to register
+the hooks.
+
+During memory pressure hook registration can fail and we end up
+with a table marked as active but no registered hooks.
+
+On table/base chain deletion, nf_tables will attempt to unregister
+the hook again which yields a warn splat from the nftables core.
+
+Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com
+Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -925,6 +925,7 @@ static int nf_tables_updtable(struct nft
+       return 0;
+ err_register_hooks:
++      ctx->table->flags |= NFT_TABLE_F_DORMANT;
+       nft_trans_destroy(trans);
+       return ret;
+ }
diff --git a/queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch b/queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch
new file mode 100644 (file)
index 0000000..29ffa9d
--- /dev/null
@@ -0,0 +1,46 @@
+From stable+bounces-50372-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:55 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:01 +0200
+Subject: netfilter: nf_tables: skip dead set elements in netlink dump
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-33-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 6b1ca88e4bb63673dc9f9c7f23c899f22c3cb17a ]
+
+Delete from packet path relies on the garbage collector to purge
+elements with NFT_SET_ELEM_DEAD_BIT on.
+
+Skip these dead elements from nf_tables_dump_setelem() path, I very
+rarely see tests/shell/testcases/maps/typeof_maps_add_delete reports
+[DUMP FAILED] showing a mismatch in the expected output with an element
+that should not be there.
+
+If the netlink dump happens before GC worker run, it might show dead
+elements in the ruleset listing.
+
+nft_rhash_get() already skips dead elements in nft_rhash_cmp(),
+therefore, it already does not show the element when getting a single
+element via netlink control plane.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4200,7 +4200,7 @@ static int nf_tables_dump_setelem(const
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+       struct nft_set_dump_args *args;
+-      if (nft_set_elem_expired(ext))
++      if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
+               return 0;
+       args = container_of(iter, struct nft_set_dump_args, iter);
diff --git a/queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch b/queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch
new file mode 100644 (file)
index 0000000..6bda4ac
--- /dev/null
@@ -0,0 +1,67 @@
+From stable+bounces-50361-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:24 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:50 +0200
+Subject: netfilter: nf_tables: unregister flowtable hooks on netns exit
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-22-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 6069da443bf65f513bb507bb21e2f87cfb1ad0b6 upstream.
+
+Unregister flowtable hooks before they are releases via
+nf_tables_flowtable_destroy() otherwise hook core reports UAF.
+
+BUG: KASAN: use-after-free in nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142
+Read of size 4 at addr ffff8880736f7438 by task syz-executor579/3666
+
+CPU: 0 PID: 3666 Comm: syz-executor579 Not tainted 5.16.0-rc5-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ __dump_stack lib/dump_stack.c:88 [inline] lib/dump_stack.c:106
+ dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 lib/dump_stack.c:106
+ print_address_description+0x65/0x380 mm/kasan/report.c:247 mm/kasan/report.c:247
+ __kasan_report mm/kasan/report.c:433 [inline]
+ __kasan_report mm/kasan/report.c:433 [inline] mm/kasan/report.c:450
+ kasan_report+0x19a/0x1f0 mm/kasan/report.c:450 mm/kasan/report.c:450
+ nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142
+ __nf_register_net_hook+0x27e/0x8d0 net/netfilter/core.c:429 net/netfilter/core.c:429
+ nf_register_net_hook+0xaa/0x180 net/netfilter/core.c:571 net/netfilter/core.c:571
+ nft_register_flowtable_net_hooks+0x3c5/0x730 net/netfilter/nf_tables_api.c:7232 net/netfilter/nf_tables_api.c:7232
+ nf_tables_newflowtable+0x2022/0x2cf0 net/netfilter/nf_tables_api.c:7430 net/netfilter/nf_tables_api.c:7430
+ nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline]
+ nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline]
+ nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] net/netfilter/nfnetlink.c:652
+ nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] net/netfilter/nfnetlink.c:652
+ nfnetlink_rcv+0x10e6/0x2550 net/netfilter/nfnetlink.c:652 net/netfilter/nfnetlink.c:652
+
+__nft_release_hook() calls nft_unregister_flowtable_net_hooks() which
+only unregisters the hooks, then after RCU grace period, it is
+guaranteed that no packets add new entries to the flowtable (no flow
+offload rules and flowtable hooks are reachable from packet path), so it
+is safe to call nf_flow_table_free() which cleans up the remaining
+entries from the flowtable (both software and hardware) and it unbinds
+the flow_block.
+
+Fixes: ff4bf2f42a40 ("netfilter: nf_tables: add nft_unregister_flowtable_hook()")
+Reported-by: syzbot+e918523f77e62790d6d9@syzkaller.appspotmail.com
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7863,6 +7863,8 @@ static void __nft_release_table(struct n
+       list_for_each_entry(chain, &table->chains, list)
+               nf_tables_unregister_hook(net, table, chain);
++      list_for_each_entry(flowtable, &table->flowtables, list)
++              nft_unregister_flowtable_net_hooks(net, flowtable);
+       /* No packets are walking on these chains anymore. */
+       ctx.table = table;
+       list_for_each_entry(chain, &table->chains, list) {
diff --git a/queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch b/queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch
new file mode 100644 (file)
index 0000000..afe00ef
--- /dev/null
@@ -0,0 +1,135 @@
+From stable+bounces-50373-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:00 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:02 +0200
+Subject: netfilter: nf_tables: validate NFPROTO_* family
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-34-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit d0009effa8862c20a13af4cb7475d9771b905693 ]
+
+Several expressions explicitly refer to NF_INET_* hook definitions
+from expr->ops->validate, however, family is not validated.
+
+Bail out with EOPNOTSUPP in case they are used from unsupported
+families.
+
+Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables")
+Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression")
+Fixes: 2fa841938c64 ("netfilter: nf_tables: introduce routing expression")
+Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching")
+Fixes: ad49d86e07a4 ("netfilter: nf_tables: Add synproxy support")
+Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
+Fixes: 6c47260250fc ("netfilter: nf_tables: add xfrm expression")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_compat.c       |   12 ++++++++++++
+ net/netfilter/nft_flow_offload.c |    5 +++++
+ net/netfilter/nft_nat.c          |    5 +++++
+ net/netfilter/nft_rt.c           |    5 +++++
+ net/netfilter/nft_socket.c       |    5 +++++
+ net/netfilter/nft_tproxy.c       |    5 +++++
+ 6 files changed, 37 insertions(+)
+
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -319,6 +319,12 @@ static int nft_target_validate(const str
+       unsigned int hook_mask = 0;
+       int ret;
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_BRIDGE &&
++          ctx->family != NFPROTO_ARP)
++              return -EOPNOTSUPP;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
+@@ -560,6 +566,12 @@ static int nft_match_validate(const stru
+       unsigned int hook_mask = 0;
+       int ret;
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_BRIDGE &&
++          ctx->family != NFPROTO_ARP)
++              return -EOPNOTSUPP;
++
+       if (nft_is_base_chain(ctx->chain)) {
+               const struct nft_base_chain *basechain =
+                                               nft_base_chain(ctx->chain);
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -145,6 +145,11 @@ static int nft_flow_offload_validate(con
+ {
+       unsigned int hook_mask = (1 << NF_INET_FORWARD);
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET)
++              return -EOPNOTSUPP;
++
+       return nft_chain_validate_hooks(ctx->chain, hook_mask);
+ }
+--- a/net/netfilter/nft_nat.c
++++ b/net/netfilter/nft_nat.c
+@@ -94,6 +94,11 @@ static int nft_nat_validate(const struct
+       struct nft_nat *priv = nft_expr_priv(expr);
+       int err;
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET)
++              return -EOPNOTSUPP;
++
+       err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+       if (err < 0)
+               return err;
+--- a/net/netfilter/nft_rt.c
++++ b/net/netfilter/nft_rt.c
+@@ -159,6 +159,11 @@ static int nft_rt_validate(const struct
+       const struct nft_rt *priv = nft_expr_priv(expr);
+       unsigned int hooks;
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET)
++              return -EOPNOTSUPP;
++
+       switch (priv->key) {
+       case NFT_RT_NEXTHOP4:
+       case NFT_RT_NEXTHOP6:
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -139,6 +139,11 @@ static int nft_socket_validate(const str
+                              const struct nft_expr *expr,
+                              const struct nft_data **data)
+ {
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET)
++              return -EOPNOTSUPP;
++
+       return nft_chain_validate_hooks(ctx->chain,
+                                       (1 << NF_INET_PRE_ROUTING) |
+                                       (1 << NF_INET_LOCAL_IN) |
+--- a/net/netfilter/nft_tproxy.c
++++ b/net/netfilter/nft_tproxy.c
+@@ -293,6 +293,11 @@ static int nft_tproxy_validate(const str
+                              const struct nft_expr *expr,
+                              const struct nft_data **data)
+ {
++      if (ctx->family != NFPROTO_IPV4 &&
++          ctx->family != NFPROTO_IPV6 &&
++          ctx->family != NFPROTO_INET)
++              return -EOPNOTSUPP;
++
+       return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+ }
diff --git a/queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch b/queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch
new file mode 100644 (file)
index 0000000..5825b92
--- /dev/null
@@ -0,0 +1,80 @@
+From stable+bounces-50367-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:41 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:56 +0200
+Subject: netfilter: nft_dynset: fix timeouts later than 23 days
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-28-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 917d80d376ffbaa9725fde9e3c0282f63643f278 upstream.
+
+Use nf_msecs_to_jiffies64 and nf_jiffies64_to_msecs as provided by
+8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23
+days"), otherwise ruleset listing breaks.
+
+Fixes: a8b1e36d0d1d ("netfilter: nft_dynset: fix element timeout for HZ != 1000")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |    3 +++
+ net/netfilter/nf_tables_api.c     |    4 ++--
+ net/netfilter/nft_dynset.c        |    8 +++++---
+ 3 files changed, 10 insertions(+), 5 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1423,4 +1423,7 @@ struct nftables_pernet {
+       unsigned int            gc_seq;
+ };
++int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
++__be64 nf_jiffies64_to_msecs(u64 input);
++
+ #endif /* _NET_NF_TABLES_H */
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3294,7 +3294,7 @@ cont:
+       return 0;
+ }
+-static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
++int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
+ {
+       u64 ms = be64_to_cpu(nla_get_be64(nla));
+       u64 max = (u64)(~((u64)0));
+@@ -3308,7 +3308,7 @@ static int nf_msecs_to_jiffies64(const s
+       return 0;
+ }
+-static __be64 nf_jiffies64_to_msecs(u64 input)
++__be64 nf_jiffies64_to_msecs(u64 input)
+ {
+       u64 ms = jiffies64_to_nsecs(input);
+--- a/net/netfilter/nft_dynset.c
++++ b/net/netfilter/nft_dynset.c
+@@ -169,8 +169,10 @@ static int nft_dynset_init(const struct
+       if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+               if (!(set->flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+-              timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+-                                              tb[NFTA_DYNSET_TIMEOUT])));
++
++              err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
++              if (err)
++                      return err;
+       }
+       err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+@@ -284,7 +286,7 @@ static int nft_dynset_dump(struct sk_buf
+       if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
+-                       cpu_to_be64(jiffies_to_msecs(priv->timeout)),
++                       nf_jiffies64_to_msecs(priv->timeout),
+                        NFTA_DYNSET_PAD))
+               goto nla_put_failure;
+       if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
diff --git a/queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch b/queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch
new file mode 100644 (file)
index 0000000..c13b840
--- /dev/null
@@ -0,0 +1,44 @@
+From stable+bounces-50370-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:50 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:59 +0200
+Subject: netfilter: nft_dynset: relax superfluous check on set updates
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-31-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 7b1394892de8d95748d05e3ee41e85edb4abbfa1 upstream.
+
+Relax this condition to make add and update commands idempotent for sets
+with no timeout. The eval function already checks if the set element
+timeout is available and updates it if the update command is used.
+
+Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_dynset.c |   10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/net/netfilter/nft_dynset.c
++++ b/net/netfilter/nft_dynset.c
+@@ -154,16 +154,8 @@ static int nft_dynset_init(const struct
+               return -EBUSY;
+       priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+-      switch (priv->op) {
+-      case NFT_DYNSET_OP_ADD:
+-              break;
+-      case NFT_DYNSET_OP_UPDATE:
+-              if (!(set->flags & NFT_SET_TIMEOUT))
+-                      return -EOPNOTSUPP;
+-              break;
+-      default:
++      if (priv->op > NFT_DYNSET_OP_UPDATE)
+               return -EOPNOTSUPP;
+-      }
+       timeout = 0;
+       if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
diff --git a/queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch b/queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch
new file mode 100644 (file)
index 0000000..4466187
--- /dev/null
@@ -0,0 +1,55 @@
+From stable+bounces-50369-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:46 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:58 +0200
+Subject: netfilter: nft_dynset: report EOPNOTSUPP on missing set feature
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-30-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 95cd4bca7b1f4a25810f3ddfc5e767fb46931789 upstream.
+
+If userspace requests a feature which is not available the original set
+definition, then bail out with EOPNOTSUPP. If userspace sends
+unsupported dynset flags (new feature not supported by this kernel),
+then report EOPNOTSUPP to userspace. EINVAL should be only used to
+report malformed netlink messages from userspace.
+
+Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_dynset.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nft_dynset.c
++++ b/net/netfilter/nft_dynset.c
+@@ -133,7 +133,7 @@ static int nft_dynset_init(const struct
+               u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+               if (flags & ~NFT_DYNSET_F_INV)
+-                      return -EINVAL;
++                      return -EOPNOTSUPP;
+               if (flags & NFT_DYNSET_F_INV)
+                       priv->invert = true;
+       }
+@@ -168,7 +168,7 @@ static int nft_dynset_init(const struct
+       timeout = 0;
+       if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+               if (!(set->flags & NFT_SET_TIMEOUT))
+-                      return -EINVAL;
++                      return -EOPNOTSUPP;
+               err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
+               if (err)
+@@ -182,7 +182,7 @@ static int nft_dynset_init(const struct
+       if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+               if (!(set->flags & NFT_SET_MAP))
+-                      return -EINVAL;
++                      return -EOPNOTSUPP;
+               if (set->dtype == NFT_DATA_VERDICT)
+                       return -EOPNOTSUPP;
diff --git a/queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch b/queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch
new file mode 100644 (file)
index 0000000..29744a6
--- /dev/null
@@ -0,0 +1,40 @@
+From stable+bounces-50358-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:14 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:48 +0200
+Subject: netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-20-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit b079155faae94e9b3ab9337e82100a914ebb4e8d upstream.
+
+Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC
+might collect the same element more than once.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_hash.c |    9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -321,12 +321,9 @@ static void nft_rhash_gc(struct work_str
+       while ((he = rhashtable_walk_next(&hti))) {
+               if (IS_ERR(he)) {
+-                      if (PTR_ERR(he) != -EAGAIN) {
+-                              nft_trans_gc_destroy(gc);
+-                              gc = NULL;
+-                              goto try_later;
+-                      }
+-                      continue;
++                      nft_trans_gc_destroy(gc);
++                      gc = NULL;
++                      goto try_later;
+               }
+               /* Ruleset has been updated, try later. */
diff --git a/queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch b/queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch
new file mode 100644 (file)
index 0000000..0bf69ee
--- /dev/null
@@ -0,0 +1,73 @@
+From stable+bounces-50345-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:41 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:34 +0200
+Subject: netfilter: nft_set_rbtree: Add missing expired checks
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-6-pablo@netfilter.org>
+
+From: Phil Sutter <phil@nwl.cc>
+
+commit 340eaff651160234bdbce07ef34b92a8e45cd540 upstream.
+
+Expired intervals would still match and be dumped to user space until
+garbage collection wiped them out. Make sure they stop matching and
+disappear (from users' perspective) as soon as they expire.
+
+Fixes: 8d8540c4f5e03 ("netfilter: nft_set_rbtree: add timeout support")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -82,6 +82,10 @@ static bool __nft_rbtree_lookup(const st
+                               parent = rcu_dereference_raw(parent->rb_left);
+                               continue;
+                       }
++
++                      if (nft_set_elem_expired(&rbe->ext))
++                              return false;
++
+                       if (nft_rbtree_interval_end(rbe)) {
+                               if (nft_set_is_anonymous(set))
+                                       return false;
+@@ -97,6 +101,7 @@ static bool __nft_rbtree_lookup(const st
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
++          !nft_set_elem_expired(&interval->ext) &&
+           nft_rbtree_interval_start(interval)) {
+               *ext = &interval->ext;
+               return true;
+@@ -157,6 +162,9 @@ static bool __nft_rbtree_get(const struc
+                               continue;
+                       }
++                      if (nft_set_elem_expired(&rbe->ext))
++                              return false;
++
+                       if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+                           (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+                           (flags & NFT_SET_ELEM_INTERVAL_END)) {
+@@ -173,6 +181,7 @@ static bool __nft_rbtree_get(const struc
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
++          !nft_set_elem_expired(&interval->ext) &&
+           ((!nft_rbtree_interval_end(interval) &&
+             !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
+            (nft_rbtree_interval_end(interval) &&
+@@ -360,6 +369,8 @@ static void nft_rbtree_walk(const struct
+               if (iter->count < iter->skip)
+                       goto cont;
++              if (nft_set_elem_expired(&rbe->ext))
++                      goto cont;
+               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+                       goto cont;
diff --git a/queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch b/queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch
new file mode 100644 (file)
index 0000000..a8f92ed
--- /dev/null
@@ -0,0 +1,51 @@
+From stable+bounces-50343-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:36 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:33 +0200
+Subject: netfilter: nft_set_rbtree: allow loose matching of closing element in interval
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-5-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 3b18d5eba491b2328b31efa4235724a2354af010 upstream.
+
+Allow to find closest matching for the right side of an interval (end
+flag set on) so we allow lookups in inner ranges, eg. 10-20 in 5-25.
+
+Fixes: ba0e4d9917b4 ("netfilter: nf_tables: get set elements via netlink")
+Reported-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -145,9 +145,12 @@ static bool __nft_rbtree_get(const struc
+               d = memcmp(this, key, set->klen);
+               if (d < 0) {
+                       parent = rcu_dereference_raw(parent->rb_left);
+-                      interval = rbe;
++                      if (!(flags & NFT_SET_ELEM_INTERVAL_END))
++                              interval = rbe;
+               } else if (d > 0) {
+                       parent = rcu_dereference_raw(parent->rb_right);
++                      if (flags & NFT_SET_ELEM_INTERVAL_END)
++                              interval = rbe;
+               } else {
+                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = rcu_dereference_raw(parent->rb_left);
+@@ -170,7 +173,10 @@ static bool __nft_rbtree_get(const struc
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
+-          !nft_rbtree_interval_end(interval)) {
++          ((!nft_rbtree_interval_end(interval) &&
++            !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
++           (nft_rbtree_interval_end(interval) &&
++            (flags & NFT_SET_ELEM_INTERVAL_END)))) {
+               *elem = interval;
+               return true;
+       }
diff --git a/queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch b/queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch
new file mode 100644 (file)
index 0000000..47996a8
--- /dev/null
@@ -0,0 +1,87 @@
+From stable+bounces-50347-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:45 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:36 +0200
+Subject: netfilter: nft_set_rbtree: fix null deref on element insertion
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-8-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 61ae320a29b0540c16931816299eb86bf2b66c08 upstream.
+
+There is no guarantee that rb_prev() will not return NULL in nft_rbtree_gc_elem():
+
+general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
+ nft_add_set_elem+0x14b0/0x2990
+  nf_tables_newsetelem+0x528/0xb30
+
+Furthermore, there is a possible use-after-free while iterating,
+'node' can be free'd so we need to cache the next value to use.
+
+Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -223,7 +223,7 @@ static int nft_rbtree_gc_elem(const stru
+ {
+       struct nft_set *set = (struct nft_set *)__set;
+       struct rb_node *prev = rb_prev(&rbe->node);
+-      struct nft_rbtree_elem *rbe_prev;
++      struct nft_rbtree_elem *rbe_prev = NULL;
+       struct nft_set_gc_batch *gcb;
+       gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+@@ -231,17 +231,21 @@ static int nft_rbtree_gc_elem(const stru
+               return -ENOMEM;
+       /* search for expired end interval coming before this element. */
+-      do {
++      while (prev) {
+               rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+               if (nft_rbtree_interval_end(rbe_prev))
+                       break;
+               prev = rb_prev(prev);
+-      } while (prev != NULL);
++      }
++
++      if (rbe_prev) {
++              rb_erase(&rbe_prev->node, &priv->root);
++              atomic_dec(&set->nelems);
++      }
+-      rb_erase(&rbe_prev->node, &priv->root);
+       rb_erase(&rbe->node, &priv->root);
+-      atomic_sub(2, &set->nelems);
++      atomic_dec(&set->nelems);
+       nft_set_gc_batch_add(gcb, rbe);
+       nft_set_gc_batch_complete(gcb);
+@@ -270,7 +274,7 @@ static int __nft_rbtree_insert(const str
+                              struct nft_set_ext **ext)
+ {
+       struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+-      struct rb_node *node, *parent, **p, *first = NULL;
++      struct rb_node *node, *next, *parent, **p, *first = NULL;
+       struct nft_rbtree *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_next(net);
+       int d, err;
+@@ -309,7 +313,9 @@ static int __nft_rbtree_insert(const str
+        * Values stored in the tree are in reversed order, starting from
+        * highest to lowest value.
+        */
+-      for (node = first; node != NULL; node = rb_next(node)) {
++      for (node = first; node != NULL; node = next) {
++              next = rb_next(node);
++
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+               if (!nft_set_elem_active(&rbe->ext, genmask))
diff --git a/queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch b/queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch
new file mode 100644 (file)
index 0000000..90911a0
--- /dev/null
@@ -0,0 +1,87 @@
+From stable+bounces-50348-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:49 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:37 +0200
+Subject: netfilter: nft_set_rbtree: fix overlap expiration walk
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-9-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f718863aca469a109895cb855e6b81fff4827d71 upstream.
+
+The lazy gc on insert that should remove timed-out entries fails to release
+the other half of the interval, if any.
+
+Can be reproduced with tests/shell/testcases/sets/0044interval_overlap_0
+in nftables.git and kmemleak enabled kernel.
+
+Second bug is the use of rbe_prev vs. prev pointer.
+If rbe_prev() returns NULL after at least one iteration, rbe_prev points
+to element that is not an end interval, hence it should not be removed.
+
+Lastly, check the genmask of the end interval if this is active in the
+current generation.
+
+Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -219,29 +219,37 @@ static void *nft_rbtree_get(const struct
+ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+                             struct nft_rbtree *priv,
+-                            struct nft_rbtree_elem *rbe)
++                            struct nft_rbtree_elem *rbe,
++                            u8 genmask)
+ {
+       struct nft_set *set = (struct nft_set *)__set;
+       struct rb_node *prev = rb_prev(&rbe->node);
+-      struct nft_rbtree_elem *rbe_prev = NULL;
++      struct nft_rbtree_elem *rbe_prev;
+       struct nft_set_gc_batch *gcb;
+       gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+       if (!gcb)
+               return -ENOMEM;
+-      /* search for expired end interval coming before this element. */
++      /* search for end interval coming before this element.
++       * end intervals don't carry a timeout extension, they
++       * are coupled with the interval start element.
++       */
+       while (prev) {
+               rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+-              if (nft_rbtree_interval_end(rbe_prev))
++              if (nft_rbtree_interval_end(rbe_prev) &&
++                  nft_set_elem_active(&rbe_prev->ext, genmask))
+                       break;
+               prev = rb_prev(prev);
+       }
+-      if (rbe_prev) {
++      if (prev) {
++              rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++
+               rb_erase(&rbe_prev->node, &priv->root);
+               atomic_dec(&set->nelems);
++              nft_set_gc_batch_add(gcb, rbe_prev);
+       }
+       rb_erase(&rbe->node, &priv->root);
+@@ -323,7 +331,7 @@ static int __nft_rbtree_insert(const str
+               /* perform garbage collection to avoid bogus overlap reports. */
+               if (nft_set_elem_expired(&rbe->ext)) {
+-                      err = nft_rbtree_gc_elem(set, priv, rbe);
++                      err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+                       if (err < 0)
+                               return err;
diff --git a/queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch b/queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch
new file mode 100644 (file)
index 0000000..f0aa260
--- /dev/null
@@ -0,0 +1,56 @@
+From stable+bounces-50374-greg=kroah.com@vger.kernel.org Thu Jun 13 03:04:02 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:02:03 +0200
+Subject: netfilter: nft_set_rbtree: skip end interval element from gc
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-35-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 upstream.
+
+rbtree lazy gc on insert might collect an end interval element that has
+been just added in this transactions, skip end interval elements that
+are not yet active.
+
+Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk")
+Cc: stable@vger.kernel.org
+Reported-by: lonial con <kongln9170@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -240,8 +240,7 @@ static void nft_rbtree_gc_remove(struct
+ static int nft_rbtree_gc_elem(const struct nft_set *__set,
+                             struct nft_rbtree *priv,
+-                            struct nft_rbtree_elem *rbe,
+-                            u8 genmask)
++                            struct nft_rbtree_elem *rbe)
+ {
+       struct nft_set *set = (struct nft_set *)__set;
+       struct rb_node *prev = rb_prev(&rbe->node);
+@@ -260,7 +259,7 @@ static int nft_rbtree_gc_elem(const stru
+       while (prev) {
+               rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+               if (nft_rbtree_interval_end(rbe_prev) &&
+-                  nft_set_elem_active(&rbe_prev->ext, genmask))
++                  nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
+                       break;
+               prev = rb_prev(prev);
+@@ -368,7 +367,7 @@ static int __nft_rbtree_insert(const str
+                */
+               if (nft_set_elem_expired(&rbe->ext) &&
+                   nft_set_elem_active(&rbe->ext, cur_genmask)) {
+-                      err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
++                      err = nft_rbtree_gc_elem(set, priv, rbe);
+                       if (err < 0)
+                               return err;
diff --git a/queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch b/queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch
new file mode 100644 (file)
index 0000000..6d8bb85
--- /dev/null
@@ -0,0 +1,50 @@
+From stable+bounces-50356-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:08 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:46 +0200
+Subject: netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-18-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 2ee52ae94baabf7ee09cf2a8d854b990dac5d0e4 upstream.
+
+New elements in this transaction might expired before such transaction
+ends. Skip sync GC for such elements otherwise commit path might walk
+over an already released object. Once transaction is finished, async GC
+will collect such expired element.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -317,6 +317,7 @@ static int __nft_rbtree_insert(const str
+       struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+       struct rb_node *node, *next, *parent, **p, *first = NULL;
+       struct nft_rbtree *priv = nft_set_priv(set);
++      u8 cur_genmask = nft_genmask_cur(net);
+       u8 genmask = nft_genmask_next(net);
+       int d, err;
+@@ -362,8 +363,11 @@ static int __nft_rbtree_insert(const str
+               if (!nft_set_elem_active(&rbe->ext, genmask))
+                       continue;
+-              /* perform garbage collection to avoid bogus overlap reports. */
+-              if (nft_set_elem_expired(&rbe->ext)) {
++              /* perform garbage collection to avoid bogus overlap reports
++               * but skip new elements in this transaction.
++               */
++              if (nft_set_elem_expired(&rbe->ext) &&
++                  nft_set_elem_active(&rbe->ext, cur_genmask)) {
+                       err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+                       if (err < 0)
+                               return err;
diff --git a/queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch b/queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch
new file mode 100644 (file)
index 0000000..1349b00
--- /dev/null
@@ -0,0 +1,323 @@
+From stable+bounces-50346-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:42 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:35 +0200
+Subject: netfilter: nft_set_rbtree: Switch to node list walk for overlap detection
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-7-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit c9e6978e2725a7d4b6cd23b2facd3f11422c0643 upstream.
+
+...instead of a tree descent, which became overly complicated in an
+attempt to cover cases where expired or inactive elements would affect
+comparisons with the new element being inserted.
+
+Further, it turned out that it's probably impossible to cover all those
+cases, as inactive nodes might entirely hide subtrees consisting of a
+complete interval plus a node that makes the current insertion not
+overlap.
+
+To speed up the overlap check, descent the tree to find a greater
+element that is closer to the key value to insert. Then walk down the
+node list for overlap detection. Starting the overlap check from
+rb_first() unconditionally is slow, it takes 10 times longer due to the
+full linear traversal of the list.
+
+Moreover, perform garbage collection of expired elements when walking
+down the node list to avoid bogus overlap reports.
+
+For the insertion operation itself, this essentially reverts back to the
+implementation before commit 7c84d41416d8 ("netfilter: nft_set_rbtree:
+Detect partial overlaps on insertion"), except that cases of complete
+overlap are already handled in the overlap detection phase itself, which
+slightly simplifies the loop to find the insertion point.
+
+Based on initial patch from Stefano Brivio, including text from the
+original patch description too.
+
+Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion")
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |  223 ++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 198 insertions(+), 25 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -41,10 +41,12 @@ static bool nft_rbtree_interval_start(co
+       return !nft_rbtree_interval_end(rbe);
+ }
+-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+-                           const struct nft_rbtree_elem *interval)
++static int nft_rbtree_cmp(const struct nft_set *set,
++                        const struct nft_rbtree_elem *e1,
++                        const struct nft_rbtree_elem *e2)
+ {
+-      return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
++      return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
++                    set->klen);
+ }
+ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+@@ -55,7 +57,6 @@ static bool __nft_rbtree_lookup(const st
+       const struct nft_rbtree_elem *rbe, *interval = NULL;
+       u8 genmask = nft_genmask_cur(net);
+       const struct rb_node *parent;
+-      const void *this;
+       int d;
+       parent = rcu_dereference_raw(priv->root.rb_node);
+@@ -65,12 +66,11 @@ static bool __nft_rbtree_lookup(const st
+               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+-              this = nft_set_ext_key(&rbe->ext);
+-              d = memcmp(this, key, set->klen);
++              d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+               if (d < 0) {
+                       parent = rcu_dereference_raw(parent->rb_left);
+                       if (interval &&
+-                          nft_rbtree_equal(set, this, interval) &&
++                          !nft_rbtree_cmp(set, rbe, interval) &&
+                           nft_rbtree_interval_end(rbe) &&
+                           nft_rbtree_interval_start(interval))
+                               continue;
+@@ -217,43 +217,216 @@ static void *nft_rbtree_get(const struct
+       return rbe;
+ }
++static int nft_rbtree_gc_elem(const struct nft_set *__set,
++                            struct nft_rbtree *priv,
++                            struct nft_rbtree_elem *rbe)
++{
++      struct nft_set *set = (struct nft_set *)__set;
++      struct rb_node *prev = rb_prev(&rbe->node);
++      struct nft_rbtree_elem *rbe_prev;
++      struct nft_set_gc_batch *gcb;
++
++      gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
++      if (!gcb)
++              return -ENOMEM;
++
++      /* search for expired end interval coming before this element. */
++      do {
++              rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++              if (nft_rbtree_interval_end(rbe_prev))
++                      break;
++
++              prev = rb_prev(prev);
++      } while (prev != NULL);
++
++      rb_erase(&rbe_prev->node, &priv->root);
++      rb_erase(&rbe->node, &priv->root);
++      atomic_sub(2, &set->nelems);
++
++      nft_set_gc_batch_add(gcb, rbe);
++      nft_set_gc_batch_complete(gcb);
++
++      return 0;
++}
++
++static bool nft_rbtree_update_first(const struct nft_set *set,
++                                  struct nft_rbtree_elem *rbe,
++                                  struct rb_node *first)
++{
++      struct nft_rbtree_elem *first_elem;
++
++      first_elem = rb_entry(first, struct nft_rbtree_elem, node);
++      /* this element is closest to where the new element is to be inserted:
++       * update the first element for the node list path.
++       */
++      if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
++              return true;
++
++      return false;
++}
++
+ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+                              struct nft_rbtree_elem *new,
+                              struct nft_set_ext **ext)
+ {
++      struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
++      struct rb_node *node, *parent, **p, *first = NULL;
+       struct nft_rbtree *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_next(net);
+-      struct nft_rbtree_elem *rbe;
+-      struct rb_node *parent, **p;
+-      int d;
++      int d, err;
++      /* Descend the tree to search for an existing element greater than the
++       * key value to insert that is greater than the new element. This is the
++       * first element to walk the ordered elements to find possible overlap.
++       */
+       parent = NULL;
+       p = &priv->root.rb_node;
+       while (*p != NULL) {
+               parent = *p;
+               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+-              d = memcmp(nft_set_ext_key(&rbe->ext),
+-                         nft_set_ext_key(&new->ext),
+-                         set->klen);
+-              if (d < 0)
++              d = nft_rbtree_cmp(set, rbe, new);
++
++              if (d < 0) {
+                       p = &parent->rb_left;
+-              else if (d > 0)
++              } else if (d > 0) {
++                      if (!first ||
++                          nft_rbtree_update_first(set, rbe, first))
++                              first = &rbe->node;
++
+                       p = &parent->rb_right;
+-              else {
+-                      if (nft_rbtree_interval_end(rbe) &&
+-                          nft_rbtree_interval_start(new)) {
++              } else {
++                      if (nft_rbtree_interval_end(rbe))
+                               p = &parent->rb_left;
+-                      } else if (nft_rbtree_interval_start(rbe) &&
+-                                 nft_rbtree_interval_end(new)) {
++                      else
+                               p = &parent->rb_right;
+-                      } else if (nft_set_elem_active(&rbe->ext, genmask)) {
+-                              *ext = &rbe->ext;
+-                              return -EEXIST;
+-                      } else {
+-                              p = &parent->rb_left;
++              }
++      }
++
++      if (!first)
++              first = rb_first(&priv->root);
++
++      /* Detect overlap by going through the list of valid tree nodes.
++       * Values stored in the tree are in reversed order, starting from
++       * highest to lowest value.
++       */
++      for (node = first; node != NULL; node = rb_next(node)) {
++              rbe = rb_entry(node, struct nft_rbtree_elem, node);
++
++              if (!nft_set_elem_active(&rbe->ext, genmask))
++                      continue;
++
++              /* perform garbage collection to avoid bogus overlap reports. */
++              if (nft_set_elem_expired(&rbe->ext)) {
++                      err = nft_rbtree_gc_elem(set, priv, rbe);
++                      if (err < 0)
++                              return err;
++
++                      continue;
++              }
++
++              d = nft_rbtree_cmp(set, rbe, new);
++              if (d == 0) {
++                      /* Matching end element: no need to look for an
++                       * overlapping greater or equal element.
++                       */
++                      if (nft_rbtree_interval_end(rbe)) {
++                              rbe_le = rbe;
++                              break;
++                      }
++
++                      /* first element that is greater or equal to key value. */
++                      if (!rbe_ge) {
++                              rbe_ge = rbe;
++                              continue;
++                      }
++
++                      /* this is a closer more or equal element, update it. */
++                      if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
++                              rbe_ge = rbe;
++                              continue;
+                       }
++
++                      /* element is equal to key value, make sure flags are
++                       * the same, an existing more or equal start element
++                       * must not be replaced by more or equal end element.
++                       */
++                      if ((nft_rbtree_interval_start(new) &&
++                           nft_rbtree_interval_start(rbe_ge)) ||
++                          (nft_rbtree_interval_end(new) &&
++                           nft_rbtree_interval_end(rbe_ge))) {
++                              rbe_ge = rbe;
++                              continue;
++                      }
++              } else if (d > 0) {
++                      /* annotate element greater than the new element. */
++                      rbe_ge = rbe;
++                      continue;
++              } else if (d < 0) {
++                      /* annotate element less than the new element. */
++                      rbe_le = rbe;
++                      break;
+               }
+       }
++
++      /* - new start element matching existing start element: full overlap
++       *   reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
++       */
++      if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
++          nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
++              *ext = &rbe_ge->ext;
++              return -EEXIST;
++      }
++
++      /* - new end element matching existing end element: full overlap
++       *   reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
++       */
++      if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
++          nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
++              *ext = &rbe_le->ext;
++              return -EEXIST;
++      }
++
++      /* - new start element with existing closest, less or equal key value
++       *   being a start element: partial overlap, reported as -ENOTEMPTY.
++       *   Anonymous sets allow for two consecutive start element since they
++       *   are constant, skip them to avoid bogus overlap reports.
++       */
++      if (!nft_set_is_anonymous(set) && rbe_le &&
++          nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
++              return -ENOTEMPTY;
++
++      /* - new end element with existing closest, less or equal key value
++       *   being a end element: partial overlap, reported as -ENOTEMPTY.
++       */
++      if (rbe_le &&
++          nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
++              return -ENOTEMPTY;
++
++      /* - new end element with existing closest, greater or equal key value
++       *   being an end element: partial overlap, reported as -ENOTEMPTY
++       */
++      if (rbe_ge &&
++          nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
++              return -ENOTEMPTY;
++
++      /* Accepted element: pick insertion point depending on key value */
++      parent = NULL;
++      p = &priv->root.rb_node;
++      while (*p != NULL) {
++              parent = *p;
++              rbe = rb_entry(parent, struct nft_rbtree_elem, node);
++              d = nft_rbtree_cmp(set, rbe, new);
++
++              if (d < 0)
++                      p = &parent->rb_left;
++              else if (d > 0)
++                      p = &parent->rb_right;
++              else if (nft_rbtree_interval_end(rbe))
++                      p = &parent->rb_left;
++              else
++                      p = &parent->rb_right;
++      }
++
+       rb_link_node_rcu(&new->node, parent, p);
+       rb_insert_color(&new->node, &priv->root);
+       return 0;
diff --git a/queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch b/queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch
new file mode 100644 (file)
index 0000000..607a972
--- /dev/null
@@ -0,0 +1,45 @@
+From stable+bounces-50359-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:15 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:47 +0200
+Subject: netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-19-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 96b33300fba880ec0eafcf3d82486f3463b4b6da upstream.
+
+rbtree GC does not modify the datastructure, instead it collects expired
+elements and it enqueues a GC transaction. Use a read spinlock instead
+to avoid data contention while GC worker is running.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -629,8 +629,7 @@ static void nft_rbtree_gc(struct work_st
+       if (!gc)
+               goto done;
+-      write_lock_bh(&priv->lock);
+-      write_seqcount_begin(&priv->count);
++      read_lock_bh(&priv->lock);
+       for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+               /* Ruleset has been updated, try later. */
+@@ -679,8 +678,7 @@ dead_elem:
+       }
+ try_later:
+-      write_seqcount_end(&priv->count);
+-      write_unlock_bh(&priv->lock);
++      read_unlock_bh(&priv->lock);
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
diff --git a/queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch b/queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch
new file mode 100644 (file)
index 0000000..3b95090
--- /dev/null
@@ -0,0 +1,72 @@
+From stable+bounces-50368-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:44 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:57 +0200
+Subject: netfilter: nftables: exthdr: fix 4-byte stack OOB write
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-29-pablo@netfilter.org>
+
+From: Florian Westphal <fw@strlen.de>
+
+commit fd94d9dadee58e09b49075240fe83423eb1dcd36 upstream.
+
+If priv->len is a multiple of 4, then dst[len / 4] can write past
+the destination array which leads to stack corruption.
+
+This construct is necessary to clean the remainder of the register
+in case ->len is NOT a multiple of the register size, so make it
+conditional just like nft_payload.c does.
+
+The bug was added in 4.1 cycle and then copied/inherited when
+tcp/sctp and ip option support was added.
+
+Bug reported by Zero Day Initiative project (ZDI-CAN-21950,
+ZDI-CAN-21951, ZDI-CAN-21961).
+
+Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing")
+Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching")
+Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks")
+Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_exthdr.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nft_exthdr.c
++++ b/net/netfilter/nft_exthdr.c
+@@ -36,6 +36,14 @@ static unsigned int optlen(const u8 *opt
+               return opt[offset + 1];
+ }
++static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
++{
++      if (len % NFT_REG32_SIZE)
++              dest[len / NFT_REG32_SIZE] = 0;
++
++      return skb_copy_bits(skb, offset, dest, len);
++}
++
+ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+@@ -57,8 +65,7 @@ static void nft_exthdr_ipv6_eval(const s
+       }
+       offset += priv->offset;
+-      dest[priv->len / NFT_REG32_SIZE] = 0;
+-      if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
++      if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+               goto err;
+       return;
+ err:
+@@ -114,7 +121,8 @@ static void nft_exthdr_tcp_eval(const st
+               if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+                       *dest = 1;
+               } else {
+-                      dest[priv->len / NFT_REG32_SIZE] = 0;
++                      if (priv->len % NFT_REG32_SIZE)
++                              dest[priv->len / NFT_REG32_SIZE] = 0;
+                       memcpy(dest, opt + offset, priv->len);
+               }
diff --git a/queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch b/queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch
new file mode 100644 (file)
index 0000000..d94b874
--- /dev/null
@@ -0,0 +1,92 @@
+From stable+bounces-50342-greg=kroah.com@vger.kernel.org Thu Jun 13 03:02:36 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:31 +0200
+Subject: netfilter: nftables: rename set element data activation/deactivation functions
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-3-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f8bb7889af58d8e74d2d61c76b1418230f1610fa upstream.
+
+Rename:
+
+- nft_set_elem_activate() to nft_set_elem_data_activate().
+- nft_set_elem_deactivate() to nft_set_elem_data_deactivate().
+
+To prepare for updates in the set element infrastructure to add support
+for the special catch-all element.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4501,8 +4501,8 @@ void nft_set_elem_destroy(const struct n
+ }
+ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+-/* Only called from commit path, nft_set_elem_deactivate() already deals with
+- * the refcounting from the preparation phase.
++/* Only called from commit path, nft_setelem_data_deactivate() already deals
++ * with the refcounting from the preparation phase.
+  */
+ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+                                      const struct nft_set *set, void *elem)
+@@ -4806,9 +4806,9 @@ void nft_data_hold(const struct nft_data
+       }
+ }
+-static void nft_set_elem_activate(const struct net *net,
+-                                const struct nft_set *set,
+-                                struct nft_set_elem *elem)
++static void nft_setelem_data_activate(const struct net *net,
++                                    const struct nft_set *set,
++                                    struct nft_set_elem *elem)
+ {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+@@ -4818,9 +4818,9 @@ static void nft_set_elem_activate(const
+               nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+ }
+-static void nft_set_elem_deactivate(const struct net *net,
+-                                  const struct nft_set *set,
+-                                  struct nft_set_elem *elem)
++static void nft_setelem_data_deactivate(const struct net *net,
++                                      const struct nft_set *set,
++                                      struct nft_set_elem *elem)
+ {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+@@ -4887,7 +4887,7 @@ static int nft_del_setelem(struct nft_ct
+       kfree(elem.priv);
+       elem.priv = priv;
+-      nft_set_elem_deactivate(ctx->net, set, &elem);
++      nft_setelem_data_deactivate(ctx->net, set, &elem);
+       nft_trans_elem(trans) = elem;
+       nft_trans_commit_list_add_tail(ctx->net, trans);
+@@ -4921,7 +4921,7 @@ static int nft_flush_set(const struct nf
+       }
+       set->ndeact++;
+-      nft_set_elem_deactivate(ctx->net, set, elem);
++      nft_setelem_data_deactivate(ctx->net, set, elem);
+       nft_trans_elem_set(trans) = set;
+       nft_trans_elem(trans) = *elem;
+       nft_trans_commit_list_add_tail(ctx->net, trans);
+@@ -6954,7 +6954,7 @@ static int __nf_tables_abort(struct net
+               case NFT_MSG_DELSETELEM:
+                       te = (struct nft_trans_elem *)trans->data;
+-                      nft_set_elem_activate(net, te->set, &te->elem);
++                      nft_setelem_data_activate(net, te->set, &te->elem);
+                       te->set->ops->activate(net, te->set, &te->elem);
+                       te->set->ndeact--;
diff --git a/queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch b/queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch
new file mode 100644 (file)
index 0000000..c133ccd
--- /dev/null
@@ -0,0 +1,114 @@
+From stable+bounces-50363-greg=kroah.com@vger.kernel.org Thu Jun 13 03:03:28 2024
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 13 Jun 2024 03:01:52 +0200
+Subject: netfilter: nftables: update table flags from the commit phase
+To: netfilter-devel@vger.kernel.org
+Cc: gregkh@linuxfoundation.org, sashal@kernel.org, stable@vger.kernel.org
+Message-ID: <20240613010209.104423-24-pablo@netfilter.org>
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 0ce7cf4127f14078ca598ba9700d813178a59409 upstream.
+
+Do not update table flags from the preparation phase. Store the flags
+update into the transaction, then update the flags from the commit
+phase.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_tables.h |    9 ++++++---
+ net/netfilter/nf_tables_api.c     |   31 ++++++++++++++++---------------
+ 2 files changed, 22 insertions(+), 18 deletions(-)
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1347,13 +1347,16 @@ struct nft_trans_chain {
+ struct nft_trans_table {
+       bool                            update;
+-      bool                            enable;
++      u8                              state;
++      u32                             flags;
+ };
+ #define nft_trans_table_update(trans) \
+       (((struct nft_trans_table *)trans->data)->update)
+-#define nft_trans_table_enable(trans) \
+-      (((struct nft_trans_table *)trans->data)->enable)
++#define nft_trans_table_state(trans)  \
++      (((struct nft_trans_table *)trans->data)->state)
++#define nft_trans_table_flags(trans)  \
++      (((struct nft_trans_table *)trans->data)->flags)
+ struct nft_trans_elem {
+       struct nft_set                  *set;
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -868,6 +868,12 @@ static void nf_tables_table_disable(stru
+       nft_table_disable(net, table, 0);
+ }
++enum {
++      NFT_TABLE_STATE_UNCHANGED       = 0,
++      NFT_TABLE_STATE_DORMANT,
++      NFT_TABLE_STATE_WAKEUP
++};
++
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+ {
+       struct nft_trans *trans;
+@@ -891,19 +897,17 @@ static int nf_tables_updtable(struct nft
+       if ((flags & NFT_TABLE_F_DORMANT) &&
+           !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+-              nft_trans_table_enable(trans) = false;
++              nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
+       } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+                  ctx->table->flags & NFT_TABLE_F_DORMANT) {
+-              ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+               ret = nf_tables_table_enable(ctx->net, ctx->table);
+               if (ret >= 0)
+-                      nft_trans_table_enable(trans) = true;
+-              else
+-                      ctx->table->flags |= NFT_TABLE_F_DORMANT;
++                      nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
+       }
+       if (ret < 0)
+               goto err;
++      nft_trans_table_flags(trans) = flags;
+       nft_trans_table_update(trans) = true;
+       nft_trans_commit_list_add_tail(ctx->net, trans);
+       return 0;
+@@ -7009,11 +7013,10 @@ static int nf_tables_commit(struct net *
+               switch (trans->msg_type) {
+               case NFT_MSG_NEWTABLE:
+                       if (nft_trans_table_update(trans)) {
+-                              if (!nft_trans_table_enable(trans)) {
+-                                      nf_tables_table_disable(net,
+-                                                              trans->ctx.table);
+-                                      trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+-                              }
++                              if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
++                                      nf_tables_table_disable(net, trans->ctx.table);
++
++                              trans->ctx.table->flags = nft_trans_table_flags(trans);
+                       } else {
+                               nft_clear(net, trans->ctx.table);
+                       }
+@@ -7174,11 +7177,9 @@ static int __nf_tables_abort(struct net
+               switch (trans->msg_type) {
+               case NFT_MSG_NEWTABLE:
+                       if (nft_trans_table_update(trans)) {
+-                              if (nft_trans_table_enable(trans)) {
+-                                      nf_tables_table_disable(net,
+-                                                              trans->ctx.table);
+-                                      trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+-                              }
++                              if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
++                                      nf_tables_table_disable(net, trans->ctx.table);
++
+                               nft_trans_destroy(trans);
+                       } else {
+                               list_del_rcu(&trans->ctx.table->list);
index 50640dd72a7400d9d236d44ae5c16f98d737efb7..9b0ce9246a9782eaed368e6d2b7e1c08b9e32a0e 100644 (file)
@@ -156,3 +156,43 @@ arm64-dts-hi3798cv200-fix-the-size-of-gicr.patch
 media-mxl5xx-move-xpt-structures-off-stack.patch
 media-v4l2-core-hold-videodev_lock-until-dev-reg-finishes.patch
 fbdev-savage-handle-err-return-when-savagefb_check_var-failed.patch
+netfilter-nf_tables-pass-context-to-nft_set_destroy.patch
+netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch
+netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch
+netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch
+netfilter-nft_set_rbtree-add-missing-expired-checks.patch
+netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch
+netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch
+netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch
+netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch
+netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch
+netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch
+netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch
+netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch
+netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch
+netfilter-nf_tables-gc-transaction-race-with-abort-path.patch
+netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch
+netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch
+netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch
+netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch
+netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch
+netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch
+netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
+netfilter-nftables-update-table-flags-from-the-commit-phase.patch
+netfilter-nf_tables-fix-table-flag-updates.patch
+netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch
+netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch
+netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch
+netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch
+netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch
+netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch
+netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch
+netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch
+netfilter-nf_tables-validate-nfproto_-family.patch
+netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch
+netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch
+netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch
+netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch
+netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch
+netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch
+netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch