]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.20-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Jan 2019 08:00:10 +0000 (09:00 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Jan 2019 08:00:10 +0000 (09:00 +0100)
added patches:
netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch
netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch
netfilter-nf_conncount-merge-lookup-and-add-functions.patch
netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch
netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch
netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch
netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch
netfilter-nf_conncount-split-gc-in-two-phases.patch

queue-4.20/netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-merge-lookup-and-add-functions.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch [new file with mode: 0644]
queue-4.20/netfilter-nf_conncount-split-gc-in-two-phases.patch [new file with mode: 0644]
queue-4.20/series

diff --git a/queue-4.20/netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch b/queue-4.20/netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch
new file mode 100644 (file)
index 0000000..80403e5
--- /dev/null
@@ -0,0 +1,35 @@
+From 4cd273bb91b3001f623f516ec726c49754571b1a Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:43 +0100
+Subject: netfilter: nf_conncount: don't skip eviction when age is negative
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 4cd273bb91b3001f623f516ec726c49754571b1a upstream.
+
+age is signed integer, so result can be negative when the timestamps
+have a large delta.  In this case we want to discard the entry.
+
+Instead of using age >= 2 || age < 0, just make it unsigned.
+
+Fixes: b36e4523d4d56 ("netfilter: nf_conncount: fix garbage collection confirm race")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -155,7 +155,7 @@ find_or_evict(struct net *net, struct nf
+       const struct nf_conntrack_tuple_hash *found;
+       unsigned long a, b;
+       int cpu = raw_smp_processor_id();
+-      __s32 age;
++      u32 age;
+       found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+       if (found)
diff --git a/queue-4.20/netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch b/queue-4.20/netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch
new file mode 100644 (file)
index 0000000..4b66c9e
--- /dev/null
@@ -0,0 +1,33 @@
+From a007232066f6839d6f256bab21e825d968f1a163 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:49 +0100
+Subject: netfilter: nf_conncount: fix argument order to find_next_bit
+
+From: Florian Westphal <fw@strlen.de>
+
+commit a007232066f6839d6f256bab21e825d968f1a163 upstream.
+
+Size and 'next bit' were swapped, this bug could cause worker to
+reschedule itself even if system was idle.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -488,7 +488,7 @@ next:
+       clear_bit(tree, data->pending_trees);
+       next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+-      next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
++      next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
+       if (next_tree < CONNCOUNT_SLOTS) {
+               data->gc_tree = next_tree;
diff --git a/queue-4.20/netfilter-nf_conncount-merge-lookup-and-add-functions.patch b/queue-4.20/netfilter-nf_conncount-merge-lookup-and-add-functions.patch
new file mode 100644 (file)
index 0000000..792e855
--- /dev/null
@@ -0,0 +1,359 @@
+From df4a902509766897f7371fdfa4c3bf8bc321b55d Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:46 +0100
+Subject: netfilter: nf_conncount: merge lookup and add functions
+
+From: Florian Westphal <fw@strlen.de>
+
+commit df4a902509766897f7371fdfa4c3bf8bc321b55d upstream.
+
+'lookup' is always followed by 'add'.
+Merge both and make the list-walk part of nf_conncount_add().
+
+This also avoids one unneeded unlock/re-lock pair.
+
+Extra care needs to be taken in count_tree, as we only hold rcu
+read lock, i.e. we can only insert to an existing tree node after
+acquiring its lock and making sure it has a nonzero count.
+
+As a zero count should be rare, just fall back to insert_tree()
+(which acquires tree lock).
+
+This issue and its solution were pointed out by Shawn Bohrer
+during patch review.
+
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/netfilter/nf_conntrack_count.h |   18 ---
+ net/netfilter/nf_conncount.c               |  146 +++++++++++++----------------
+ net/netfilter/nft_connlimit.c              |   14 --
+ 3 files changed, 72 insertions(+), 106 deletions(-)
+
+--- a/include/net/netfilter/nf_conntrack_count.h
++++ b/include/net/netfilter/nf_conntrack_count.h
+@@ -5,12 +5,6 @@
+ struct nf_conncount_data;
+-enum nf_conncount_list_add {
+-      NF_CONNCOUNT_ADDED,     /* list add was ok */
+-      NF_CONNCOUNT_ERR,       /* -ENOMEM, must drop skb */
+-      NF_CONNCOUNT_SKIP,      /* list is already reclaimed by gc */
+-};
+-
+ struct nf_conncount_list {
+       spinlock_t list_lock;
+       struct list_head head;  /* connections with the same filtering key */
+@@ -29,18 +23,12 @@ unsigned int nf_conncount_count(struct n
+                               const struct nf_conntrack_tuple *tuple,
+                               const struct nf_conntrack_zone *zone);
+-void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+-                       const struct nf_conntrack_tuple *tuple,
+-                       const struct nf_conntrack_zone *zone,
+-                       bool *addit);
++int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
++                   const struct nf_conntrack_tuple *tuple,
++                   const struct nf_conntrack_zone *zone);
+ void nf_conncount_list_init(struct nf_conncount_list *list);
+-enum nf_conncount_list_add
+-nf_conncount_add(struct nf_conncount_list *list,
+-               const struct nf_conntrack_tuple *tuple,
+-               const struct nf_conntrack_zone *zone);
+-
+ bool nf_conncount_gc_list(struct net *net,
+                         struct nf_conncount_list *list);
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -83,38 +83,6 @@ static int key_diff(const u32 *a, const
+       return memcmp(a, b, klen * sizeof(u32));
+ }
+-enum nf_conncount_list_add
+-nf_conncount_add(struct nf_conncount_list *list,
+-               const struct nf_conntrack_tuple *tuple,
+-               const struct nf_conntrack_zone *zone)
+-{
+-      struct nf_conncount_tuple *conn;
+-
+-      if (WARN_ON_ONCE(list->count > INT_MAX))
+-              return NF_CONNCOUNT_ERR;
+-
+-      conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+-      if (conn == NULL)
+-              return NF_CONNCOUNT_ERR;
+-
+-      conn->tuple = *tuple;
+-      conn->zone = *zone;
+-      conn->cpu = raw_smp_processor_id();
+-      conn->jiffies32 = (u32)jiffies;
+-      conn->dead = false;
+-      spin_lock_bh(&list->list_lock);
+-      if (list->dead == true) {
+-              kmem_cache_free(conncount_conn_cachep, conn);
+-              spin_unlock_bh(&list->list_lock);
+-              return NF_CONNCOUNT_SKIP;
+-      }
+-      list_add_tail(&conn->node, &list->head);
+-      list->count++;
+-      spin_unlock_bh(&list->list_lock);
+-      return NF_CONNCOUNT_ADDED;
+-}
+-EXPORT_SYMBOL_GPL(nf_conncount_add);
+-
+ static void __conn_free(struct rcu_head *h)
+ {
+       struct nf_conncount_tuple *conn;
+@@ -177,11 +145,10 @@ find_or_evict(struct net *net, struct nf
+       return ERR_PTR(-EAGAIN);
+ }
+-void nf_conncount_lookup(struct net *net,
+-                       struct nf_conncount_list *list,
+-                       const struct nf_conntrack_tuple *tuple,
+-                       const struct nf_conntrack_zone *zone,
+-                       bool *addit)
++static int __nf_conncount_add(struct net *net,
++                            struct nf_conncount_list *list,
++                            const struct nf_conntrack_tuple *tuple,
++                            const struct nf_conntrack_zone *zone)
+ {
+       const struct nf_conntrack_tuple_hash *found;
+       struct nf_conncount_tuple *conn, *conn_n;
+@@ -189,9 +156,6 @@ void nf_conncount_lookup(struct net *net
+       unsigned int collect = 0;
+       bool free_entry = false;
+-      /* best effort only */
+-      *addit = tuple ? true : false;
+-
+       /* check the saved connections */
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+               if (collect > CONNCOUNT_GC_MAX_NODES)
+@@ -201,21 +165,19 @@ void nf_conncount_lookup(struct net *net
+               if (IS_ERR(found)) {
+                       /* Not found, but might be about to be confirmed */
+                       if (PTR_ERR(found) == -EAGAIN) {
+-                              if (!tuple)
+-                                      continue;
+-
+                               if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+                                   nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+                                   nf_ct_zone_id(zone, zone->dir))
+-                                      *addit = false;
+-                      } else if (PTR_ERR(found) == -ENOENT)
++                                      return 0; /* already exists */
++                      } else {
+                               collect++;
++                      }
+                       continue;
+               }
+               found_ct = nf_ct_tuplehash_to_ctrack(found);
+-              if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
++              if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+                   nf_ct_zone_equal(found_ct, zone, zone->dir)) {
+                       /*
+                        * We should not see tuples twice unless someone hooks
+@@ -223,7 +185,8 @@ void nf_conncount_lookup(struct net *net
+                        *
+                        * Attempt to avoid a re-add in this case.
+                        */
+-                      *addit = false;
++                      nf_ct_put(found_ct);
++                      return 0;
+               } else if (already_closed(found_ct)) {
+                       /*
+                        * we do not care about connections which are
+@@ -237,8 +200,38 @@ void nf_conncount_lookup(struct net *net
+               nf_ct_put(found_ct);
+       }
++
++      if (WARN_ON_ONCE(list->count > INT_MAX))
++              return -EOVERFLOW;
++
++      conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
++      if (conn == NULL)
++              return -ENOMEM;
++
++      conn->tuple = *tuple;
++      conn->zone = *zone;
++      conn->cpu = raw_smp_processor_id();
++      conn->jiffies32 = (u32)jiffies;
++      list_add_tail(&conn->node, &list->head);
++      list->count++;
++      return 0;
++}
++
++int nf_conncount_add(struct net *net,
++                   struct nf_conncount_list *list,
++                   const struct nf_conntrack_tuple *tuple,
++                   const struct nf_conntrack_zone *zone)
++{
++      int ret;
++
++      /* check the saved connections */
++      spin_lock_bh(&list->list_lock);
++      ret = __nf_conncount_add(net, list, tuple, zone);
++      spin_unlock_bh(&list->list_lock);
++
++      return ret;
+ }
+-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
++EXPORT_SYMBOL_GPL(nf_conncount_add);
+ void nf_conncount_list_init(struct nf_conncount_list *list)
+ {
+@@ -339,13 +332,11 @@ insert_tree(struct net *net,
+           const struct nf_conntrack_tuple *tuple,
+           const struct nf_conntrack_zone *zone)
+ {
+-      enum nf_conncount_list_add ret;
+       struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+       struct rb_node **rbnode, *parent;
+       struct nf_conncount_rb *rbconn;
+       struct nf_conncount_tuple *conn;
+       unsigned int count = 0, gc_count = 0;
+-      bool node_found = false;
+       bool do_gc = true;
+       spin_lock_bh(&nf_conncount_locks[hash]);
+@@ -363,20 +354,15 @@ restart:
+               } else if (diff > 0) {
+                       rbnode = &((*rbnode)->rb_right);
+               } else {
+-                      /* unlikely: other cpu added node already */
+-                      node_found = true;
+-                      ret = nf_conncount_add(&rbconn->list, tuple, zone);
+-                      if (ret == NF_CONNCOUNT_ERR) {
++                      int ret;
++
++                      ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
++                      if (ret)
+                               count = 0; /* hotdrop */
+-                      } else if (ret == NF_CONNCOUNT_ADDED) {
++                      else
+                               count = rbconn->list.count;
+-                      } else {
+-                              /* NF_CONNCOUNT_SKIP, rbconn is already
+-                               * reclaimed by gc, insert a new tree node
+-                               */
+-                              node_found = false;
+-                      }
+-                      break;
++                      tree_nodes_free(root, gc_nodes, gc_count);
++                      goto out_unlock;
+               }
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
+@@ -394,9 +380,6 @@ restart:
+               goto restart;
+       }
+-      if (node_found)
+-              goto out_unlock;
+-
+       /* expected case: match, insert new node */
+       rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+       if (rbconn == NULL)
+@@ -431,7 +414,6 @@ count_tree(struct net *net,
+          const struct nf_conntrack_tuple *tuple,
+          const struct nf_conntrack_zone *zone)
+ {
+-      enum nf_conncount_list_add ret;
+       struct rb_root *root;
+       struct rb_node *parent;
+       struct nf_conncount_rb *rbconn;
+@@ -444,7 +426,6 @@ count_tree(struct net *net,
+       parent = rcu_dereference_raw(root->rb_node);
+       while (parent) {
+               int diff;
+-              bool addit;
+               rbconn = rb_entry(parent, struct nf_conncount_rb, node);
+@@ -454,24 +435,29 @@ count_tree(struct net *net,
+               } else if (diff > 0) {
+                       parent = rcu_dereference_raw(parent->rb_right);
+               } else {
+-                      /* same source network -> be counted! */
+-                      nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+-                                          &addit);
++                      int ret;
+-                      if (!addit)
++                      if (!tuple) {
++                              nf_conncount_gc_list(net, &rbconn->list);
+                               return rbconn->list.count;
++                      }
+-                      ret = nf_conncount_add(&rbconn->list, tuple, zone);
+-                      if (ret == NF_CONNCOUNT_ERR) {
+-                              return 0; /* hotdrop */
+-                      } else if (ret == NF_CONNCOUNT_ADDED) {
+-                              return rbconn->list.count;
+-                      } else {
+-                              /* NF_CONNCOUNT_SKIP, rbconn is already
+-                               * reclaimed by gc, insert a new tree node
+-                               */
++                      spin_lock_bh(&rbconn->list.list_lock);
++                      /* Node might be about to be free'd.
++                       * We need to defer to insert_tree() in this case.
++                       */
++                      if (rbconn->list.count == 0) {
++                              spin_unlock_bh(&rbconn->list.list_lock);
+                               break;
+                       }
++
++                      /* same source network -> be counted! */
++                      ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
++                      spin_unlock_bh(&rbconn->list.list_lock);
++                      if (ret)
++                              return 0; /* hotdrop */
++                      else
++                              return rbconn->list.count;
+               }
+       }
+--- a/net/netfilter/nft_connlimit.c
++++ b/net/netfilter/nft_connlimit.c
+@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval
+       enum ip_conntrack_info ctinfo;
+       const struct nf_conn *ct;
+       unsigned int count;
+-      bool addit;
+       tuple_ptr = &tuple;
+@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval
+               return;
+       }
+-      nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+-                          &addit);
+-      count = priv->list.count;
+-
+-      if (!addit)
+-              goto out;
+-
+-      if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
++      if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
+               regs->verdict.code = NF_DROP;
+               return;
+       }
+-      count++;
+-out:
++
++      count = priv->list.count;
+       if ((count > priv->limit) ^ priv->invert) {
+               regs->verdict.code = NFT_BREAK;
diff --git a/queue-4.20/netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch b/queue-4.20/netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch
new file mode 100644 (file)
index 0000000..72539b4
--- /dev/null
@@ -0,0 +1,155 @@
+From 2f971a8f425545da52ca0e6bee81f5b1ea0ccc5f Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 28 Dec 2018 01:24:47 +0100
+Subject: netfilter: nf_conncount: move all list iterations under spinlock
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 2f971a8f425545da52ca0e6bee81f5b1ea0ccc5f upstream.
+
+Two CPUs may race to remove a connection from the list, the existing
+conn->dead will result in a use-after-free. Use the per-list spinlock to
+protect list iterations.
+
+As all accesses to the list now happen while holding the per-list lock,
+we no longer need to delay free operations with rcu.
+
+Joint work with Florian.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |   46 ++++++++++++++++++-------------------------
+ 1 file changed, 20 insertions(+), 26 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -43,8 +43,6 @@ struct nf_conncount_tuple {
+       struct nf_conntrack_zone        zone;
+       int                             cpu;
+       u32                             jiffies32;
+-      bool                            dead;
+-      struct rcu_head                 rcu_head;
+ };
+ struct nf_conncount_rb {
+@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const
+       return memcmp(a, b, klen * sizeof(u32));
+ }
+-static void __conn_free(struct rcu_head *h)
+-{
+-      struct nf_conncount_tuple *conn;
+-
+-      conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+-      kmem_cache_free(conncount_conn_cachep, conn);
+-}
+-
+ static bool conn_free(struct nf_conncount_list *list,
+                     struct nf_conncount_tuple *conn)
+ {
+       bool free_entry = false;
+-      spin_lock_bh(&list->list_lock);
+-
+-      if (conn->dead) {
+-              spin_unlock_bh(&list->list_lock);
+-              return free_entry;
+-      }
++      lockdep_assert_held(&list->list_lock);
+       list->count--;
+-      conn->dead = true;
+-      list_del_rcu(&conn->node);
++      list_del(&conn->node);
+       if (list->count == 0) {
+               list->dead = true;
+               free_entry = true;
+       }
+-      spin_unlock_bh(&list->list_lock);
+-      call_rcu(&conn->rcu_head, __conn_free);
++      kmem_cache_free(conncount_conn_cachep, conn);
+       return free_entry;
+ }
+@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_co
+ }
+ EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+-/* Return true if the list is empty */
++/* Return true if the list is empty. Must be called with BH disabled. */
+ bool nf_conncount_gc_list(struct net *net,
+                         struct nf_conncount_list *list)
+ {
+@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *ne
+       bool free_entry = false;
+       bool ret = false;
++      /* don't bother if other cpu is already doing GC */
++      if (!spin_trylock(&list->list_lock))
++              return false;
++
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+               found = find_or_evict(net, list, conn, &free_entry);
+               if (IS_ERR(found)) {
+                       if (PTR_ERR(found) == -ENOENT)  {
+-                              if (free_entry)
++                              if (free_entry) {
++                                      spin_unlock(&list->list_lock);
+                                       return true;
++                              }
+                               collected++;
+                       }
+                       continue;
+@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *ne
+                        * closed already -> ditch it
+                        */
+                       nf_ct_put(found_ct);
+-                      if (conn_free(list, conn))
++                      if (conn_free(list, conn)) {
++                              spin_unlock(&list->list_lock);
+                               return true;
++                      }
+                       collected++;
+                       continue;
+               }
+               nf_ct_put(found_ct);
+               if (collected > CONNCOUNT_GC_MAX_NODES)
+-                      return false;
++                      break;
+       }
+-      spin_lock_bh(&list->list_lock);
+       if (!list->count) {
+               list->dead = true;
+               ret = true;
+       }
+-      spin_unlock_bh(&list->list_lock);
++      spin_unlock(&list->list_lock);
+       return ret;
+ }
+@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_s
+       tree = data->gc_tree % CONNCOUNT_SLOTS;
+       root = &data->root[tree];
++      local_bh_disable();
+       rcu_read_lock();
+       for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+               rbconn = rb_entry(node, struct nf_conncount_rb, node);
+@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_s
+                       gc_count++;
+       }
+       rcu_read_unlock();
++      local_bh_enable();
++
++      cond_resched();
+       spin_lock_bh(&nf_conncount_locks[tree]);
+       if (gc_count < ARRAY_SIZE(gc_nodes))
diff --git a/queue-4.20/netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch b/queue-4.20/netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch
new file mode 100644 (file)
index 0000000..7c3accd
--- /dev/null
@@ -0,0 +1,91 @@
+From c78e7818f16f687389174c4569243abbec8dc68f Mon Sep 17 00:00:00 2001
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+Date: Fri, 28 Dec 2018 01:24:42 +0100
+Subject: netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS
+
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+
+commit c78e7818f16f687389174c4569243abbec8dc68f upstream.
+
+Most of the time these were the same value anyway, but when
+CONFIG_LOCKDEP was enabled we would use a smaller number of locks to
+reduce overhead.  Unfortunately having two values is confusing and not
+worth the complexity.
+
+This fixes a bug where tree_gc_worker() would only GC up to
+CONNCOUNT_LOCK_SLOTS trees which meant when CONFIG_LOCKDEP was enabled
+not all trees would be GCed by tree_gc_worker().
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |   19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -33,12 +33,6 @@
+ #define CONNCOUNT_SLOTS               256U
+-#ifdef CONFIG_LOCKDEP
+-#define CONNCOUNT_LOCK_SLOTS  8U
+-#else
+-#define CONNCOUNT_LOCK_SLOTS  256U
+-#endif
+-
+ #define CONNCOUNT_GC_MAX_NODES        8
+ #define MAX_KEYLEN            5
+@@ -60,7 +54,7 @@ struct nf_conncount_rb {
+       struct rcu_head rcu_head;
+ };
+-static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
++static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
+ struct nf_conncount_data {
+       unsigned int keylen;
+@@ -353,7 +347,7 @@ insert_tree(struct net *net,
+       unsigned int count = 0, gc_count = 0;
+       bool node_found = false;
+-      spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
++      spin_lock_bh(&nf_conncount_locks[hash]);
+       parent = NULL;
+       rbnode = &(root->rb_node);
+@@ -430,7 +424,7 @@ insert_tree(struct net *net,
+       rb_link_node_rcu(&rbconn->node, parent, rbnode);
+       rb_insert_color(&rbconn->node, root);
+ out_unlock:
+-      spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
++      spin_unlock_bh(&nf_conncount_locks[hash]);
+       return count;
+ }
+@@ -499,7 +493,7 @@ static void tree_gc_worker(struct work_s
+       struct rb_node *node;
+       unsigned int tree, next_tree, gc_count = 0;
+-      tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
++      tree = data->gc_tree % CONNCOUNT_SLOTS;
+       root = &data->root[tree];
+       rcu_read_lock();
+@@ -621,10 +615,7 @@ static int __init nf_conncount_modinit(v
+ {
+       int i;
+-      BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
+-      BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
+-
+-      for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
++      for (i = 0; i < CONNCOUNT_SLOTS; ++i)
+               spin_lock_init(&nf_conncount_locks[i]);
+       conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
diff --git a/queue-4.20/netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch b/queue-4.20/netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch
new file mode 100644 (file)
index 0000000..4fb52a7
--- /dev/null
@@ -0,0 +1,86 @@
+From e8cfb372b38a1b8979aa7f7631fb5e7b11c3793c Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:45 +0100
+Subject: netfilter: nf_conncount: restart search when nodes have been erased
+
+From: Florian Westphal <fw@strlen.de>
+
+commit e8cfb372b38a1b8979aa7f7631fb5e7b11c3793c upstream.
+
+Shawn Bohrer reported a following crash:
+ |RIP: 0010:rb_erase+0xae/0x360
+ [..]
+ Call Trace:
+  nf_conncount_destroy+0x59/0xc0 [nf_conncount]
+  cleanup_match+0x45/0x70 [ip_tables]
+  ...
+
+Shawn tracked this down to bogus 'parent' pointer:
+Problem is that when we insert a new node, then there is a chance that
+the 'parent' that we found was also passed to tree_nodes_free() (because
+that node was empty) for erase+free.
+
+Instead of trying to be clever and detect when this happens, restart
+the search if we have evicted one or more nodes.  To prevent frequent
+restarts, do not perform gc on the second round.
+
+Also, unconditionally schedule the gc worker.
+The condition
+
+  gc_count > ARRAY_SIZE(gc_nodes))
+
+cannot be true unless tree grows very large, as the height of the tree
+will be low even with hundreds of nodes present.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reported-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |   18 +++++++-----------
+ 1 file changed, 7 insertions(+), 11 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -346,9 +346,10 @@ insert_tree(struct net *net,
+       struct nf_conncount_tuple *conn;
+       unsigned int count = 0, gc_count = 0;
+       bool node_found = false;
++      bool do_gc = true;
+       spin_lock_bh(&nf_conncount_locks[hash]);
+-
++restart:
+       parent = NULL;
+       rbnode = &(root->rb_node);
+       while (*rbnode) {
+@@ -381,21 +382,16 @@ insert_tree(struct net *net,
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
+                       continue;
+-              if (nf_conncount_gc_list(net, &rbconn->list))
++              if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
+                       gc_nodes[gc_count++] = rbconn;
+       }
+       if (gc_count) {
+               tree_nodes_free(root, gc_nodes, gc_count);
+-              /* tree_node_free before new allocation permits
+-               * allocator to re-use newly free'd object.
+-               *
+-               * This is a rare event; in most cases we will find
+-               * existing node to re-use. (or gc_count is 0).
+-               */
+-
+-              if (gc_count >= ARRAY_SIZE(gc_nodes))
+-                      schedule_gc_worker(data, hash);
++              schedule_gc_worker(data, hash);
++              gc_count = 0;
++              do_gc = false;
++              goto restart;
+       }
+       if (node_found)
diff --git a/queue-4.20/netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch b/queue-4.20/netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch
new file mode 100644 (file)
index 0000000..43f0b43
--- /dev/null
@@ -0,0 +1,198 @@
+From c80f10bc973af2ace6b1414724eeff61eaa71837 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 28 Dec 2018 01:24:48 +0100
+Subject: netfilter: nf_conncount: speculative garbage collection on empty lists
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit c80f10bc973af2ace6b1414724eeff61eaa71837 upstream.
+
+Instead of removing a empty list node that might be reintroduced soon
+thereafter, tentatively place the empty list node on the list passed to
+tree_nodes_free(), then re-check if the list is empty again before erasing
+it from the tree.
+
+[ Florian: rebase on top of pending nf_conncount fixes ]
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/netfilter/nf_conntrack_count.h |    1 
+ net/netfilter/nf_conncount.c               |   47 +++++++++--------------------
+ 2 files changed, 15 insertions(+), 33 deletions(-)
+
+--- a/include/net/netfilter/nf_conntrack_count.h
++++ b/include/net/netfilter/nf_conntrack_count.h
+@@ -9,7 +9,6 @@ struct nf_conncount_list {
+       spinlock_t list_lock;
+       struct list_head head;  /* connections with the same filtering key */
+       unsigned int count;     /* length of list */
+-      bool dead;
+ };
+ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const
+       return memcmp(a, b, klen * sizeof(u32));
+ }
+-static bool conn_free(struct nf_conncount_list *list,
++static void conn_free(struct nf_conncount_list *list,
+                     struct nf_conncount_tuple *conn)
+ {
+-      bool free_entry = false;
+-
+       lockdep_assert_held(&list->list_lock);
+       list->count--;
+       list_del(&conn->node);
+-      if (list->count == 0) {
+-              list->dead = true;
+-              free_entry = true;
+-      }
+       kmem_cache_free(conncount_conn_cachep, conn);
+-      return free_entry;
+ }
+ static const struct nf_conntrack_tuple_hash *
+ find_or_evict(struct net *net, struct nf_conncount_list *list,
+-            struct nf_conncount_tuple *conn, bool *free_entry)
++            struct nf_conncount_tuple *conn)
+ {
+       const struct nf_conntrack_tuple_hash *found;
+       unsigned long a, b;
+@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf
+        */
+       age = a - b;
+       if (conn->cpu == cpu || age >= 2) {
+-              *free_entry = conn_free(list, conn);
++              conn_free(list, conn);
+               return ERR_PTR(-ENOENT);
+       }
+@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net
+       struct nf_conncount_tuple *conn, *conn_n;
+       struct nf_conn *found_ct;
+       unsigned int collect = 0;
+-      bool free_entry = false;
+       /* check the saved connections */
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+               if (collect > CONNCOUNT_GC_MAX_NODES)
+                       break;
+-              found = find_or_evict(net, list, conn, &free_entry);
++              found = find_or_evict(net, list, conn);
+               if (IS_ERR(found)) {
+                       /* Not found, but might be about to be confirmed */
+                       if (PTR_ERR(found) == -EAGAIN) {
+@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_co
+       spin_lock_init(&list->list_lock);
+       INIT_LIST_HEAD(&list->head);
+       list->count = 0;
+-      list->dead = false;
+ }
+ EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *ne
+       struct nf_conncount_tuple *conn, *conn_n;
+       struct nf_conn *found_ct;
+       unsigned int collected = 0;
+-      bool free_entry = false;
+       bool ret = false;
+       /* don't bother if other cpu is already doing GC */
+@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *ne
+               return false;
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+-              found = find_or_evict(net, list, conn, &free_entry);
++              found = find_or_evict(net, list, conn);
+               if (IS_ERR(found)) {
+-                      if (PTR_ERR(found) == -ENOENT)  {
+-                              if (free_entry) {
+-                                      spin_unlock(&list->list_lock);
+-                                      return true;
+-                              }
++                      if (PTR_ERR(found) == -ENOENT)
+                               collected++;
+-                      }
+                       continue;
+               }
+@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *ne
+                        * closed already -> ditch it
+                        */
+                       nf_ct_put(found_ct);
+-                      if (conn_free(list, conn)) {
+-                              spin_unlock(&list->list_lock);
+-                              return true;
+-                      }
++                      conn_free(list, conn);
+                       collected++;
+                       continue;
+               }
+@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *ne
+                       break;
+       }
+-      if (!list->count) {
+-              list->dead = true;
++      if (!list->count)
+               ret = true;
+-      }
+       spin_unlock(&list->list_lock);
+       return ret;
+@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu
+       kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
++/* caller must hold tree nf_conncount_locks[] lock */
+ static void tree_nodes_free(struct rb_root *root,
+                           struct nf_conncount_rb *gc_nodes[],
+                           unsigned int gc_count)
+@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_ro
+       while (gc_count) {
+               rbconn = gc_nodes[--gc_count];
+               spin_lock(&rbconn->list.list_lock);
+-              rb_erase(&rbconn->node, root);
+-              call_rcu(&rbconn->rcu_head, __tree_nodes_free);
++              if (!rbconn->list.count) {
++                      rb_erase(&rbconn->node, root);
++                      call_rcu(&rbconn->rcu_head, __tree_nodes_free);
++              }
+               spin_unlock(&rbconn->list.list_lock);
+       }
+ }
+@@ -318,7 +301,6 @@ insert_tree(struct net *net,
+           struct rb_root *root,
+           unsigned int hash,
+           const u32 *key,
+-          u8 keylen,
+           const struct nf_conntrack_tuple *tuple,
+           const struct nf_conntrack_zone *zone)
+ {
+@@ -327,6 +309,7 @@ insert_tree(struct net *net,
+       struct nf_conncount_rb *rbconn;
+       struct nf_conncount_tuple *conn;
+       unsigned int count = 0, gc_count = 0;
++      u8 keylen = data->keylen;
+       bool do_gc = true;
+       spin_lock_bh(&nf_conncount_locks[hash]);
+@@ -454,7 +437,7 @@ count_tree(struct net *net,
+       if (!tuple)
+               return 0;
+-      return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
++      return insert_tree(net, data, root, hash, key, tuple, zone);
+ }
+ static void tree_gc_worker(struct work_struct *work)
diff --git a/queue-4.20/netfilter-nf_conncount-split-gc-in-two-phases.patch b/queue-4.20/netfilter-nf_conncount-split-gc-in-two-phases.patch
new file mode 100644 (file)
index 0000000..8a92ff6
--- /dev/null
@@ -0,0 +1,77 @@
+From f7fcc98dfc2d136722007fec0debbed761679b94 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:44 +0100
+Subject: netfilter: nf_conncount: split gc in two phases
+
+From: Florian Westphal <fw@strlen.de>
+
+commit f7fcc98dfc2d136722007fec0debbed761679b94 upstream.
+
+The lockless workqueue garbage collector can race with packet path
+garbage collector to delete list nodes, as it calls tree_nodes_free()
+with the addresses of nodes that might have been free'd already from
+another cpu.
+
+To fix this, split gc into two phases.
+
+One phase to perform gc on the connections: From a locking perspective,
+this is the same as count_tree(): we hold rcu lock, but we do not
+change the tree, we only change the nodes' contents.
+
+The second phase acquires the tree lock and reaps empty nodes.
+This avoids a race condition of the garbage collection vs.  packet path:
+If a node has been free'd already, the second phase won't find it anymore.
+
+This second phase is, from locking perspective, same as insert_tree().
+
+The former only modifies nodes (list content, count), latter modifies
+the tree itself (rb_erase or rb_insert).
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c |   22 +++++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -500,16 +500,32 @@ static void tree_gc_worker(struct work_s
+       for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+               rbconn = rb_entry(node, struct nf_conncount_rb, node);
+               if (nf_conncount_gc_list(data->net, &rbconn->list))
+-                      gc_nodes[gc_count++] = rbconn;
++                      gc_count++;
+       }
+       rcu_read_unlock();
+       spin_lock_bh(&nf_conncount_locks[tree]);
++      if (gc_count < ARRAY_SIZE(gc_nodes))
++              goto next; /* do not bother */
+-      if (gc_count) {
+-              tree_nodes_free(root, gc_nodes, gc_count);
++      gc_count = 0;
++      node = rb_first(root);
++      while (node != NULL) {
++              rbconn = rb_entry(node, struct nf_conncount_rb, node);
++              node = rb_next(node);
++
++              if (rbconn->list.count > 0)
++                      continue;
++
++              gc_nodes[gc_count++] = rbconn;
++              if (gc_count >= ARRAY_SIZE(gc_nodes)) {
++                      tree_nodes_free(root, gc_nodes, gc_count);
++                      gc_count = 0;
++              }
+       }
++      tree_nodes_free(root, gc_nodes, gc_count);
++next:
+       clear_bit(tree, data->pending_trees);
+       next_tree = (tree + 1) % CONNCOUNT_SLOTS;
index 900517d6332949a82f89b9606ac2b0206df51ecf..8241e1f4d55fd098a075667586a8456debf99f13 100644 (file)
@@ -1,5 +1,13 @@
 tty-ldsem-wake-up-readers-after-timed-out-down_write.patch
-can-gw-ensure-dlc-boundaries-after-can-frame-modification.patch
 tty-hold-tty_ldisc_lock-during-tty_reopen.patch
 tty-simplify-tty-count-math-in-tty_reopen.patch
 tty-don-t-hold-ldisc-lock-in-tty_reopen-if-ldisc-present.patch
+can-gw-ensure-dlc-boundaries-after-can-frame-modification.patch
+netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch
+netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch
+netfilter-nf_conncount-split-gc-in-two-phases.patch
+netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch
+netfilter-nf_conncount-merge-lookup-and-add-functions.patch
+netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch
+netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch
+netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch