--- /dev/null
+From 4cd273bb91b3001f623f516ec726c49754571b1a Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:43 +0100
+Subject: netfilter: nf_conncount: don't skip eviction when age is negative
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 4cd273bb91b3001f623f516ec726c49754571b1a upstream.
+
+age is signed integer, so result can be negative when the timestamps
+have a large delta. In this case we want to discard the entry.
+
+Instead of using age >= 2 || age < 0, just make it unsigned.
+
+Fixes: b36e4523d4d56 ("netfilter: nf_conncount: fix garbage collection confirm race")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -155,7 +155,7 @@ find_or_evict(struct net *net, struct nf
+ const struct nf_conntrack_tuple_hash *found;
+ unsigned long a, b;
+ int cpu = raw_smp_processor_id();
+- __s32 age;
++ u32 age;
+
+ found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+ if (found)
--- /dev/null
+From a007232066f6839d6f256bab21e825d968f1a163 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:49 +0100
+Subject: netfilter: nf_conncount: fix argument order to find_next_bit
+
+From: Florian Westphal <fw@strlen.de>
+
+commit a007232066f6839d6f256bab21e825d968f1a163 upstream.
+
+Size and 'next bit' were swapped, this bug could cause worker to
+reschedule itself even if system was idle.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -488,7 +488,7 @@ next:
+ clear_bit(tree, data->pending_trees);
+
+ next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+- next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
++ next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
+
+ if (next_tree < CONNCOUNT_SLOTS) {
+ data->gc_tree = next_tree;
--- /dev/null
+From df4a902509766897f7371fdfa4c3bf8bc321b55d Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:46 +0100
+Subject: netfilter: nf_conncount: merge lookup and add functions
+
+From: Florian Westphal <fw@strlen.de>
+
+commit df4a902509766897f7371fdfa4c3bf8bc321b55d upstream.
+
+'lookup' is always followed by 'add'.
+Merge both and make the list-walk part of nf_conncount_add().
+
+This also avoids one unneeded unlock/re-lock pair.
+
+Extra care needs to be taken in count_tree, as we only hold rcu
+read lock, i.e. we can only insert to an existing tree node after
+acquiring its lock and making sure it has a nonzero count.
+
+As a zero count should be rare, just fall back to insert_tree()
+(which acquires tree lock).
+
+This issue and its solution were pointed out by Shawn Bohrer
+during patch review.
+
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/netfilter/nf_conntrack_count.h | 18 ---
+ net/netfilter/nf_conncount.c | 146 +++++++++++++----------------
+ net/netfilter/nft_connlimit.c | 14 --
+ 3 files changed, 72 insertions(+), 106 deletions(-)
+
+--- a/include/net/netfilter/nf_conntrack_count.h
++++ b/include/net/netfilter/nf_conntrack_count.h
+@@ -5,12 +5,6 @@
+
+ struct nf_conncount_data;
+
+-enum nf_conncount_list_add {
+- NF_CONNCOUNT_ADDED, /* list add was ok */
+- NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */
+- NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */
+-};
+-
+ struct nf_conncount_list {
+ spinlock_t list_lock;
+ struct list_head head; /* connections with the same filtering key */
+@@ -29,18 +23,12 @@ unsigned int nf_conncount_count(struct n
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone);
+
+-void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+- const struct nf_conntrack_tuple *tuple,
+- const struct nf_conntrack_zone *zone,
+- bool *addit);
++int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
++ const struct nf_conntrack_tuple *tuple,
++ const struct nf_conntrack_zone *zone);
+
+ void nf_conncount_list_init(struct nf_conncount_list *list);
+
+-enum nf_conncount_list_add
+-nf_conncount_add(struct nf_conncount_list *list,
+- const struct nf_conntrack_tuple *tuple,
+- const struct nf_conntrack_zone *zone);
+-
+ bool nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list);
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -83,38 +83,6 @@ static int key_diff(const u32 *a, const
+ return memcmp(a, b, klen * sizeof(u32));
+ }
+
+-enum nf_conncount_list_add
+-nf_conncount_add(struct nf_conncount_list *list,
+- const struct nf_conntrack_tuple *tuple,
+- const struct nf_conntrack_zone *zone)
+-{
+- struct nf_conncount_tuple *conn;
+-
+- if (WARN_ON_ONCE(list->count > INT_MAX))
+- return NF_CONNCOUNT_ERR;
+-
+- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+- if (conn == NULL)
+- return NF_CONNCOUNT_ERR;
+-
+- conn->tuple = *tuple;
+- conn->zone = *zone;
+- conn->cpu = raw_smp_processor_id();
+- conn->jiffies32 = (u32)jiffies;
+- conn->dead = false;
+- spin_lock_bh(&list->list_lock);
+- if (list->dead == true) {
+- kmem_cache_free(conncount_conn_cachep, conn);
+- spin_unlock_bh(&list->list_lock);
+- return NF_CONNCOUNT_SKIP;
+- }
+- list_add_tail(&conn->node, &list->head);
+- list->count++;
+- spin_unlock_bh(&list->list_lock);
+- return NF_CONNCOUNT_ADDED;
+-}
+-EXPORT_SYMBOL_GPL(nf_conncount_add);
+-
+ static void __conn_free(struct rcu_head *h)
+ {
+ struct nf_conncount_tuple *conn;
+@@ -177,11 +145,10 @@ find_or_evict(struct net *net, struct nf
+ return ERR_PTR(-EAGAIN);
+ }
+
+-void nf_conncount_lookup(struct net *net,
+- struct nf_conncount_list *list,
+- const struct nf_conntrack_tuple *tuple,
+- const struct nf_conntrack_zone *zone,
+- bool *addit)
++static int __nf_conncount_add(struct net *net,
++ struct nf_conncount_list *list,
++ const struct nf_conntrack_tuple *tuple,
++ const struct nf_conntrack_zone *zone)
+ {
+ const struct nf_conntrack_tuple_hash *found;
+ struct nf_conncount_tuple *conn, *conn_n;
+@@ -189,9 +156,6 @@ void nf_conncount_lookup(struct net *net
+ unsigned int collect = 0;
+ bool free_entry = false;
+
+- /* best effort only */
+- *addit = tuple ? true : false;
+-
+ /* check the saved connections */
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ if (collect > CONNCOUNT_GC_MAX_NODES)
+@@ -201,21 +165,19 @@ void nf_conncount_lookup(struct net *net
+ if (IS_ERR(found)) {
+ /* Not found, but might be about to be confirmed */
+ if (PTR_ERR(found) == -EAGAIN) {
+- if (!tuple)
+- continue;
+-
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+ nf_ct_zone_id(zone, zone->dir))
+- *addit = false;
+- } else if (PTR_ERR(found) == -ENOENT)
++ return 0; /* already exists */
++ } else {
+ collect++;
++ }
+ continue;
+ }
+
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
+
+- if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
++ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ nf_ct_zone_equal(found_ct, zone, zone->dir)) {
+ /*
+ * We should not see tuples twice unless someone hooks
+@@ -223,7 +185,8 @@ void nf_conncount_lookup(struct net *net
+ *
+ * Attempt to avoid a re-add in this case.
+ */
+- *addit = false;
++ nf_ct_put(found_ct);
++ return 0;
+ } else if (already_closed(found_ct)) {
+ /*
+ * we do not care about connections which are
+@@ -237,8 +200,38 @@ void nf_conncount_lookup(struct net *net
+
+ nf_ct_put(found_ct);
+ }
++
++ if (WARN_ON_ONCE(list->count > INT_MAX))
++ return -EOVERFLOW;
++
++ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
++ if (conn == NULL)
++ return -ENOMEM;
++
++ conn->tuple = *tuple;
++ conn->zone = *zone;
++ conn->cpu = raw_smp_processor_id();
++ conn->jiffies32 = (u32)jiffies;
++ list_add_tail(&conn->node, &list->head);
++ list->count++;
++ return 0;
++}
++
++int nf_conncount_add(struct net *net,
++ struct nf_conncount_list *list,
++ const struct nf_conntrack_tuple *tuple,
++ const struct nf_conntrack_zone *zone)
++{
++ int ret;
++
++ /* check the saved connections */
++ spin_lock_bh(&list->list_lock);
++ ret = __nf_conncount_add(net, list, tuple, zone);
++ spin_unlock_bh(&list->list_lock);
++
++ return ret;
+ }
+-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
++EXPORT_SYMBOL_GPL(nf_conncount_add);
+
+ void nf_conncount_list_init(struct nf_conncount_list *list)
+ {
+@@ -339,13 +332,11 @@ insert_tree(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+ {
+- enum nf_conncount_list_add ret;
+ struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct nf_conncount_rb *rbconn;
+ struct nf_conncount_tuple *conn;
+ unsigned int count = 0, gc_count = 0;
+- bool node_found = false;
+ bool do_gc = true;
+
+ spin_lock_bh(&nf_conncount_locks[hash]);
+@@ -363,20 +354,15 @@ restart:
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+- /* unlikely: other cpu added node already */
+- node_found = true;
+- ret = nf_conncount_add(&rbconn->list, tuple, zone);
+- if (ret == NF_CONNCOUNT_ERR) {
++ int ret;
++
++ ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
++ if (ret)
+ count = 0; /* hotdrop */
+- } else if (ret == NF_CONNCOUNT_ADDED) {
++ else
+ count = rbconn->list.count;
+- } else {
+- /* NF_CONNCOUNT_SKIP, rbconn is already
+- * reclaimed by gc, insert a new tree node
+- */
+- node_found = false;
+- }
+- break;
++ tree_nodes_free(root, gc_nodes, gc_count);
++ goto out_unlock;
+ }
+
+ if (gc_count >= ARRAY_SIZE(gc_nodes))
+@@ -394,9 +380,6 @@ restart:
+ goto restart;
+ }
+
+- if (node_found)
+- goto out_unlock;
+-
+ /* expected case: match, insert new node */
+ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+@@ -431,7 +414,6 @@ count_tree(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+ {
+- enum nf_conncount_list_add ret;
+ struct rb_root *root;
+ struct rb_node *parent;
+ struct nf_conncount_rb *rbconn;
+@@ -444,7 +426,6 @@ count_tree(struct net *net,
+ parent = rcu_dereference_raw(root->rb_node);
+ while (parent) {
+ int diff;
+- bool addit;
+
+ rbconn = rb_entry(parent, struct nf_conncount_rb, node);
+
+@@ -454,24 +435,29 @@ count_tree(struct net *net,
+ } else if (diff > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ } else {
+- /* same source network -> be counted! */
+- nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+- &addit);
++ int ret;
+
+- if (!addit)
++ if (!tuple) {
++ nf_conncount_gc_list(net, &rbconn->list);
+ return rbconn->list.count;
++ }
+
+- ret = nf_conncount_add(&rbconn->list, tuple, zone);
+- if (ret == NF_CONNCOUNT_ERR) {
+- return 0; /* hotdrop */
+- } else if (ret == NF_CONNCOUNT_ADDED) {
+- return rbconn->list.count;
+- } else {
+- /* NF_CONNCOUNT_SKIP, rbconn is already
+- * reclaimed by gc, insert a new tree node
+- */
++ spin_lock_bh(&rbconn->list.list_lock);
++ /* Node might be about to be free'd.
++ * We need to defer to insert_tree() in this case.
++ */
++ if (rbconn->list.count == 0) {
++ spin_unlock_bh(&rbconn->list.list_lock);
+ break;
+ }
++
++ /* same source network -> be counted! */
++ ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
++ spin_unlock_bh(&rbconn->list.list_lock);
++ if (ret)
++ return 0; /* hotdrop */
++ else
++ return rbconn->list.count;
+ }
+ }
+
+--- a/net/netfilter/nft_connlimit.c
++++ b/net/netfilter/nft_connlimit.c
+@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int count;
+- bool addit;
+
+ tuple_ptr = &tuple;
+
+@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval
+ return;
+ }
+
+- nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+- &addit);
+- count = priv->list.count;
+-
+- if (!addit)
+- goto out;
+-
+- if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
++ if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+- count++;
+-out:
++
++ count = priv->list.count;
+
+ if ((count > priv->limit) ^ priv->invert) {
+ regs->verdict.code = NFT_BREAK;
--- /dev/null
+From 2f971a8f425545da52ca0e6bee81f5b1ea0ccc5f Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 28 Dec 2018 01:24:47 +0100
+Subject: netfilter: nf_conncount: move all list iterations under spinlock
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 2f971a8f425545da52ca0e6bee81f5b1ea0ccc5f upstream.
+
+Two CPUs may race to remove a connection from the list, the existing
+conn->dead will result in a use-after-free. Use the per-list spinlock to
+protect list iterations.
+
+As all accesses to the list now happen while holding the per-list lock,
+we no longer need to delay free operations with rcu.
+
+Joint work with Florian.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 46 ++++++++++++++++++-------------------------
+ 1 file changed, 20 insertions(+), 26 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -43,8 +43,6 @@ struct nf_conncount_tuple {
+ struct nf_conntrack_zone zone;
+ int cpu;
+ u32 jiffies32;
+- bool dead;
+- struct rcu_head rcu_head;
+ };
+
+ struct nf_conncount_rb {
+@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const
+ return memcmp(a, b, klen * sizeof(u32));
+ }
+
+-static void __conn_free(struct rcu_head *h)
+-{
+- struct nf_conncount_tuple *conn;
+-
+- conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+- kmem_cache_free(conncount_conn_cachep, conn);
+-}
+-
+ static bool conn_free(struct nf_conncount_list *list,
+ struct nf_conncount_tuple *conn)
+ {
+ bool free_entry = false;
+
+- spin_lock_bh(&list->list_lock);
+-
+- if (conn->dead) {
+- spin_unlock_bh(&list->list_lock);
+- return free_entry;
+- }
++ lockdep_assert_held(&list->list_lock);
+
+ list->count--;
+- conn->dead = true;
+- list_del_rcu(&conn->node);
++ list_del(&conn->node);
+ if (list->count == 0) {
+ list->dead = true;
+ free_entry = true;
+ }
+
+- spin_unlock_bh(&list->list_lock);
+- call_rcu(&conn->rcu_head, __conn_free);
++ kmem_cache_free(conncount_conn_cachep, conn);
+ return free_entry;
+ }
+
+@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_co
+ }
+ EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+-/* Return true if the list is empty */
++/* Return true if the list is empty. Must be called with BH disabled. */
+ bool nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
+ {
+@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *ne
+ bool free_entry = false;
+ bool ret = false;
+
++ /* don't bother if other cpu is already doing GC */
++ if (!spin_trylock(&list->list_lock))
++ return false;
++
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ found = find_or_evict(net, list, conn, &free_entry);
+ if (IS_ERR(found)) {
+ if (PTR_ERR(found) == -ENOENT) {
+- if (free_entry)
++ if (free_entry) {
++ spin_unlock(&list->list_lock);
+ return true;
++ }
+ collected++;
+ }
+ continue;
+@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *ne
+ * closed already -> ditch it
+ */
+ nf_ct_put(found_ct);
+- if (conn_free(list, conn))
++ if (conn_free(list, conn)) {
++ spin_unlock(&list->list_lock);
+ return true;
++ }
+ collected++;
+ continue;
+ }
+
+ nf_ct_put(found_ct);
+ if (collected > CONNCOUNT_GC_MAX_NODES)
+- return false;
++ break;
+ }
+
+- spin_lock_bh(&list->list_lock);
+ if (!list->count) {
+ list->dead = true;
+ ret = true;
+ }
+- spin_unlock_bh(&list->list_lock);
++ spin_unlock(&list->list_lock);
+
+ return ret;
+ }
+@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_s
+ tree = data->gc_tree % CONNCOUNT_SLOTS;
+ root = &data->root[tree];
+
++ local_bh_disable();
+ rcu_read_lock();
+ for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_s
+ gc_count++;
+ }
+ rcu_read_unlock();
++ local_bh_enable();
++
++ cond_resched();
+
+ spin_lock_bh(&nf_conncount_locks[tree]);
+ if (gc_count < ARRAY_SIZE(gc_nodes))
--- /dev/null
+From c78e7818f16f687389174c4569243abbec8dc68f Mon Sep 17 00:00:00 2001
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+Date: Fri, 28 Dec 2018 01:24:42 +0100
+Subject: netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS
+
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+
+commit c78e7818f16f687389174c4569243abbec8dc68f upstream.
+
+Most of the time these were the same value anyway, but when
+CONFIG_LOCKDEP was enabled we would use a smaller number of locks to
+reduce overhead. Unfortunately having two values is confusing and not
+worth the complexity.
+
+This fixes a bug where tree_gc_worker() would only GC up to
+CONNCOUNT_LOCK_SLOTS trees which meant when CONFIG_LOCKDEP was enabled
+not all trees would be GCed by tree_gc_worker().
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -33,12 +33,6 @@
+
+ #define CONNCOUNT_SLOTS 256U
+
+-#ifdef CONFIG_LOCKDEP
+-#define CONNCOUNT_LOCK_SLOTS 8U
+-#else
+-#define CONNCOUNT_LOCK_SLOTS 256U
+-#endif
+-
+ #define CONNCOUNT_GC_MAX_NODES 8
+ #define MAX_KEYLEN 5
+
+@@ -60,7 +54,7 @@ struct nf_conncount_rb {
+ struct rcu_head rcu_head;
+ };
+
+-static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
++static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
+
+ struct nf_conncount_data {
+ unsigned int keylen;
+@@ -353,7 +347,7 @@ insert_tree(struct net *net,
+ unsigned int count = 0, gc_count = 0;
+ bool node_found = false;
+
+- spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
++ spin_lock_bh(&nf_conncount_locks[hash]);
+
+ parent = NULL;
+ rbnode = &(root->rb_node);
+@@ -430,7 +424,7 @@ insert_tree(struct net *net,
+ rb_link_node_rcu(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ out_unlock:
+- spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
++ spin_unlock_bh(&nf_conncount_locks[hash]);
+ return count;
+ }
+
+@@ -499,7 +493,7 @@ static void tree_gc_worker(struct work_s
+ struct rb_node *node;
+ unsigned int tree, next_tree, gc_count = 0;
+
+- tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
++ tree = data->gc_tree % CONNCOUNT_SLOTS;
+ root = &data->root[tree];
+
+ rcu_read_lock();
+@@ -621,10 +615,7 @@ static int __init nf_conncount_modinit(v
+ {
+ int i;
+
+- BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
+- BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
+-
+- for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
++ for (i = 0; i < CONNCOUNT_SLOTS; ++i)
+ spin_lock_init(&nf_conncount_locks[i]);
+
+ conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
--- /dev/null
+From e8cfb372b38a1b8979aa7f7631fb5e7b11c3793c Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:45 +0100
+Subject: netfilter: nf_conncount: restart search when nodes have been erased
+
+From: Florian Westphal <fw@strlen.de>
+
+commit e8cfb372b38a1b8979aa7f7631fb5e7b11c3793c upstream.
+
+Shawn Bohrer reported a following crash:
+ |RIP: 0010:rb_erase+0xae/0x360
+ [..]
+ Call Trace:
+ nf_conncount_destroy+0x59/0xc0 [nf_conncount]
+ cleanup_match+0x45/0x70 [ip_tables]
+ ...
+
+Shawn tracked this down to bogus 'parent' pointer:
+Problem is that when we insert a new node, then there is a chance that
+the 'parent' that we found was also passed to tree_nodes_free() (because
+that node was empty) for erase+free.
+
+Instead of trying to be clever and detect when this happens, restart
+the search if we have evicted one or more nodes. To prevent frequent
+restarts, do not perform gc on the second round.
+
+Also, unconditionally schedule the gc worker.
+The condition
+
+ gc_count > ARRAY_SIZE(gc_nodes))
+
+cannot be true unless tree grows very large, as the height of the tree
+will be low even with hundreds of nodes present.
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reported-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 18 +++++++-----------
+ 1 file changed, 7 insertions(+), 11 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -346,9 +346,10 @@ insert_tree(struct net *net,
+ struct nf_conncount_tuple *conn;
+ unsigned int count = 0, gc_count = 0;
+ bool node_found = false;
++ bool do_gc = true;
+
+ spin_lock_bh(&nf_conncount_locks[hash]);
+-
++restart:
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+@@ -381,21 +382,16 @@ insert_tree(struct net *net,
+ if (gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+- if (nf_conncount_gc_list(net, &rbconn->list))
++ if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ tree_nodes_free(root, gc_nodes, gc_count);
+- /* tree_node_free before new allocation permits
+- * allocator to re-use newly free'd object.
+- *
+- * This is a rare event; in most cases we will find
+- * existing node to re-use. (or gc_count is 0).
+- */
+-
+- if (gc_count >= ARRAY_SIZE(gc_nodes))
+- schedule_gc_worker(data, hash);
++ schedule_gc_worker(data, hash);
++ gc_count = 0;
++ do_gc = false;
++ goto restart;
+ }
+
+ if (node_found)
--- /dev/null
+From c80f10bc973af2ace6b1414724eeff61eaa71837 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 28 Dec 2018 01:24:48 +0100
+Subject: netfilter: nf_conncount: speculative garbage collection on empty lists
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit c80f10bc973af2ace6b1414724eeff61eaa71837 upstream.
+
+Instead of removing a empty list node that might be reintroduced soon
+thereafter, tentatively place the empty list node on the list passed to
+tree_nodes_free(), then re-check if the list is empty again before erasing
+it from the tree.
+
+[ Florian: rebase on top of pending nf_conncount fixes ]
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/netfilter/nf_conntrack_count.h | 1
+ net/netfilter/nf_conncount.c | 47 +++++++++--------------------
+ 2 files changed, 15 insertions(+), 33 deletions(-)
+
+--- a/include/net/netfilter/nf_conntrack_count.h
++++ b/include/net/netfilter/nf_conntrack_count.h
+@@ -9,7 +9,6 @@ struct nf_conncount_list {
+ spinlock_t list_lock;
+ struct list_head head; /* connections with the same filtering key */
+ unsigned int count; /* length of list */
+- bool dead;
+ };
+
+ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const
+ return memcmp(a, b, klen * sizeof(u32));
+ }
+
+-static bool conn_free(struct nf_conncount_list *list,
++static void conn_free(struct nf_conncount_list *list,
+ struct nf_conncount_tuple *conn)
+ {
+- bool free_entry = false;
+-
+ lockdep_assert_held(&list->list_lock);
+
+ list->count--;
+ list_del(&conn->node);
+- if (list->count == 0) {
+- list->dead = true;
+- free_entry = true;
+- }
+
+ kmem_cache_free(conncount_conn_cachep, conn);
+- return free_entry;
+ }
+
+ static const struct nf_conntrack_tuple_hash *
+ find_or_evict(struct net *net, struct nf_conncount_list *list,
+- struct nf_conncount_tuple *conn, bool *free_entry)
++ struct nf_conncount_tuple *conn)
+ {
+ const struct nf_conntrack_tuple_hash *found;
+ unsigned long a, b;
+@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf
+ */
+ age = a - b;
+ if (conn->cpu == cpu || age >= 2) {
+- *free_entry = conn_free(list, conn);
++ conn_free(list, conn);
+ return ERR_PTR(-ENOENT);
+ }
+
+@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net
+ struct nf_conncount_tuple *conn, *conn_n;
+ struct nf_conn *found_ct;
+ unsigned int collect = 0;
+- bool free_entry = false;
+
+ /* check the saved connections */
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ if (collect > CONNCOUNT_GC_MAX_NODES)
+ break;
+
+- found = find_or_evict(net, list, conn, &free_entry);
++ found = find_or_evict(net, list, conn);
+ if (IS_ERR(found)) {
+ /* Not found, but might be about to be confirmed */
+ if (PTR_ERR(found) == -EAGAIN) {
+@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_co
+ spin_lock_init(&list->list_lock);
+ INIT_LIST_HEAD(&list->head);
+ list->count = 0;
+- list->dead = false;
+ }
+ EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *ne
+ struct nf_conncount_tuple *conn, *conn_n;
+ struct nf_conn *found_ct;
+ unsigned int collected = 0;
+- bool free_entry = false;
+ bool ret = false;
+
+ /* don't bother if other cpu is already doing GC */
+@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *ne
+ return false;
+
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+- found = find_or_evict(net, list, conn, &free_entry);
++ found = find_or_evict(net, list, conn);
+ if (IS_ERR(found)) {
+- if (PTR_ERR(found) == -ENOENT) {
+- if (free_entry) {
+- spin_unlock(&list->list_lock);
+- return true;
+- }
++ if (PTR_ERR(found) == -ENOENT)
+ collected++;
+- }
+ continue;
+ }
+
+@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *ne
+ * closed already -> ditch it
+ */
+ nf_ct_put(found_ct);
+- if (conn_free(list, conn)) {
+- spin_unlock(&list->list_lock);
+- return true;
+- }
++ conn_free(list, conn);
+ collected++;
+ continue;
+ }
+@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *ne
+ break;
+ }
+
+- if (!list->count) {
+- list->dead = true;
++ if (!list->count)
+ ret = true;
+- }
+ spin_unlock(&list->list_lock);
+
+ return ret;
+@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
+
++/* caller must hold tree nf_conncount_locks[] lock */
+ static void tree_nodes_free(struct rb_root *root,
+ struct nf_conncount_rb *gc_nodes[],
+ unsigned int gc_count)
+@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_ro
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ spin_lock(&rbconn->list.list_lock);
+- rb_erase(&rbconn->node, root);
+- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
++ if (!rbconn->list.count) {
++ rb_erase(&rbconn->node, root);
++ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
++ }
+ spin_unlock(&rbconn->list.list_lock);
+ }
+ }
+@@ -318,7 +301,6 @@ insert_tree(struct net *net,
+ struct rb_root *root,
+ unsigned int hash,
+ const u32 *key,
+- u8 keylen,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+ {
+@@ -327,6 +309,7 @@ insert_tree(struct net *net,
+ struct nf_conncount_rb *rbconn;
+ struct nf_conncount_tuple *conn;
+ unsigned int count = 0, gc_count = 0;
++ u8 keylen = data->keylen;
+ bool do_gc = true;
+
+ spin_lock_bh(&nf_conncount_locks[hash]);
+@@ -454,7 +437,7 @@ count_tree(struct net *net,
+ if (!tuple)
+ return 0;
+
+- return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
++ return insert_tree(net, data, root, hash, key, tuple, zone);
+ }
+
+ static void tree_gc_worker(struct work_struct *work)
--- /dev/null
+From f7fcc98dfc2d136722007fec0debbed761679b94 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Dec 2018 01:24:44 +0100
+Subject: netfilter: nf_conncount: split gc in two phases
+
+From: Florian Westphal <fw@strlen.de>
+
+commit f7fcc98dfc2d136722007fec0debbed761679b94 upstream.
+
+The lockless workqueue garbage collector can race with packet path
+garbage collector to delete list nodes, as it calls tree_nodes_free()
+with the addresses of nodes that might have been free'd already from
+another cpu.
+
+To fix this, split gc into two phases.
+
+One phase to perform gc on the connections: From a locking perspective,
+this is the same as count_tree(): we hold rcu lock, but we do not
+change the tree, we only change the nodes' contents.
+
+The second phase acquires the tree lock and reaps empty nodes.
+This avoids a race condition of the garbage collection vs. packet path:
+If a node has been free'd already, the second phase won't find it anymore.
+
+This second phase is, from locking perspective, same as insert_tree().
+
+The former only modifies nodes (list content, count), latter modifies
+the tree itself (rb_erase or rb_insert).
+
+Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
+Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conncount.c | 22 +++++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -500,16 +500,32 @@ static void tree_gc_worker(struct work_s
+ for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+ if (nf_conncount_gc_list(data->net, &rbconn->list))
+- gc_nodes[gc_count++] = rbconn;
++ gc_count++;
+ }
+ rcu_read_unlock();
+
+ spin_lock_bh(&nf_conncount_locks[tree]);
++ if (gc_count < ARRAY_SIZE(gc_nodes))
++ goto next; /* do not bother */
+
+- if (gc_count) {
+- tree_nodes_free(root, gc_nodes, gc_count);
++ gc_count = 0;
++ node = rb_first(root);
++ while (node != NULL) {
++ rbconn = rb_entry(node, struct nf_conncount_rb, node);
++ node = rb_next(node);
++
++ if (rbconn->list.count > 0)
++ continue;
++
++ gc_nodes[gc_count++] = rbconn;
++ if (gc_count >= ARRAY_SIZE(gc_nodes)) {
++ tree_nodes_free(root, gc_nodes, gc_count);
++ gc_count = 0;
++ }
+ }
+
++ tree_nodes_free(root, gc_nodes, gc_count);
++next:
+ clear_bit(tree, data->pending_trees);
+
+ next_tree = (tree + 1) % CONNCOUNT_SLOTS;
tty-ldsem-wake-up-readers-after-timed-out-down_write.patch
-can-gw-ensure-dlc-boundaries-after-can-frame-modification.patch
tty-hold-tty_ldisc_lock-during-tty_reopen.patch
tty-simplify-tty-count-math-in-tty_reopen.patch
tty-don-t-hold-ldisc-lock-in-tty_reopen-if-ldisc-present.patch
+can-gw-ensure-dlc-boundaries-after-can-frame-modification.patch
+netfilter-nf_conncount-replace-conncount_lock_slots-with-conncount_slots.patch
+netfilter-nf_conncount-don-t-skip-eviction-when-age-is-negative.patch
+netfilter-nf_conncount-split-gc-in-two-phases.patch
+netfilter-nf_conncount-restart-search-when-nodes-have-been-erased.patch
+netfilter-nf_conncount-merge-lookup-and-add-functions.patch
+netfilter-nf_conncount-move-all-list-iterations-under-spinlock.patch
+netfilter-nf_conncount-speculative-garbage-collection-on-empty-lists.patch
+netfilter-nf_conncount-fix-argument-order-to-find_next_bit.patch