--- /dev/null
+From ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Tue, 22 Nov 2016 14:44:19 +0100
+Subject: netfilter: x_tables: pack percpu counter allocations
+
+From: Florian Westphal <fw@strlen.de>
+
+commit ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 upstream.
+
+instead of allocating each xt_counter individually, allocate 4k chunks
+and then use these for counter allocation requests.
+
+This should speed up rule evaluation by increasing data locality,
+also speeds up ruleset loading because we reduce calls to the percpu
+allocator.
+
+As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
+arches with 64k page size.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/netfilter/x_tables.h | 7 ++++++-
+ net/ipv4/netfilter/arp_tables.c | 9 ++++++---
+ net/ipv4/netfilter/ip_tables.c | 9 ++++++---
+ net/ipv6/netfilter/ip6_tables.c | 9 ++++++---
+ net/netfilter/x_tables.c | 34 +++++++++++++++++++++++++---------
+ 5 files changed, 49 insertions(+), 19 deletions(-)
+
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -375,8 +375,13 @@ static inline unsigned long ifname_compa
+ return ret;
+ }
+
++struct xt_percpu_counter_alloc_state {
++ unsigned int off;
++ const char __percpu *mem;
++};
+
+-bool xt_percpu_counter_alloc(struct xt_counters *counters);
++bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
++ struct xt_counters *counter);
+ void xt_percpu_counter_free(struct xt_counters *cnt);
+
+ static inline struct xt_counters *
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -419,13 +419,14 @@ static inline int check_target(struct ar
+ }
+
+ static inline int
+-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
++find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
++ struct xt_percpu_counter_alloc_state *alloc_state)
+ {
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+
+- if (!xt_percpu_counter_alloc(&e->counters))
++ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
+ return -ENOMEM;
+
+ t = arpt_get_target(e);
+@@ -533,6 +534,7 @@ static inline void cleanup_entry(struct
+ static int translate_table(struct xt_table_info *newinfo, void *entry0,
+ const struct arpt_replace *repl)
+ {
++ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
+ struct arpt_entry *iter;
+ unsigned int *offsets;
+ unsigned int i;
+@@ -595,7 +597,8 @@ static int translate_table(struct xt_tab
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+- ret = find_check_entry(iter, repl->name, repl->size);
++ ret = find_check_entry(iter, repl->name, repl->size,
++ &alloc_state);
+ if (ret != 0)
+ break;
+ ++i;
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -540,7 +540,8 @@ static int check_target(struct ipt_entry
+
+ static int
+ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+- unsigned int size)
++ unsigned int size,
++ struct xt_percpu_counter_alloc_state *alloc_state)
+ {
+ struct xt_entry_target *t;
+ struct xt_target *target;
+@@ -549,7 +550,7 @@ find_check_entry(struct ipt_entry *e, st
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+
+- if (!xt_percpu_counter_alloc(&e->counters))
++ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
+ return -ENOMEM;
+
+ j = 0;
+@@ -685,6 +686,7 @@ static int
+ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ipt_replace *repl)
+ {
++ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
+ struct ipt_entry *iter;
+ unsigned int *offsets;
+ unsigned int i;
+@@ -744,7 +746,8 @@ translate_table(struct net *net, struct
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+- ret = find_check_entry(iter, net, repl->name, repl->size);
++ ret = find_check_entry(iter, net, repl->name, repl->size,
++ &alloc_state);
+ if (ret != 0)
+ break;
+ ++i;
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -570,7 +570,8 @@ static int check_target(struct ip6t_entr
+
+ static int
+ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
+- unsigned int size)
++ unsigned int size,
++ struct xt_percpu_counter_alloc_state *alloc_state)
+ {
+ struct xt_entry_target *t;
+ struct xt_target *target;
+@@ -579,7 +580,7 @@ find_check_entry(struct ip6t_entry *e, s
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+
+- if (!xt_percpu_counter_alloc(&e->counters))
++ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
+ return -ENOMEM;
+
+ j = 0;
+@@ -713,6 +714,7 @@ static int
+ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ip6t_replace *repl)
+ {
++ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
+ struct ip6t_entry *iter;
+ unsigned int *offsets;
+ unsigned int i;
+@@ -772,7 +774,8 @@ translate_table(struct net *net, struct
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+- ret = find_check_entry(iter, net, repl->name, repl->size);
++ ret = find_check_entry(iter, net, repl->name, repl->size,
++ &alloc_state);
+ if (ret != 0)
+ break;
+ ++i;
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -39,6 +39,8 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
+
++#define XT_PCPU_BLOCK_SIZE 4096
++
+ struct compat_delta {
+ unsigned int offset; /* offset in kernel */
+ int delta; /* delta in 32bit user land */
+@@ -1622,6 +1624,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
+ /**
+ * xt_percpu_counter_alloc - allocate x_tables rule counter
+ *
++ * @state: pointer to xt_percpu allocation state
+ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
+ *
+ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
+@@ -1630,21 +1633,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
+ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
+ * to fetch the real percpu counter.
+ *
++ * To speed up allocation and improve data locality, a 4kb block is
++ * allocated.
++ *
++ * xt_percpu_counter_alloc_state contains the base address of the
++ * allocated page and the current sub-offset.
++ *
+ * returns false on error.
+ */
+-bool xt_percpu_counter_alloc(struct xt_counters *counter)
++bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
++ struct xt_counters *counter)
+ {
+- void __percpu *res;
++ BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
+
+ if (nr_cpu_ids <= 1)
+ return true;
+
+- res = __alloc_percpu(sizeof(struct xt_counters),
+- sizeof(struct xt_counters));
+- if (!res)
+- return false;
+-
+- counter->pcnt = (__force unsigned long)res;
++ if (!state->mem) {
++ state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
++ XT_PCPU_BLOCK_SIZE);
++ if (!state->mem)
++ return false;
++ }
++ counter->pcnt = (__force unsigned long)(state->mem + state->off);
++ state->off += sizeof(*counter);
++ if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
++ state->mem = NULL;
++ state->off = 0;
++ }
+ return true;
+ }
+ EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
+@@ -1653,7 +1669,7 @@ void xt_percpu_counter_free(struct xt_co
+ {
+ unsigned long pcnt = counters->pcnt;
+
+- if (nr_cpu_ids > 1)
++ if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
+ free_percpu((void __percpu *)pcnt);
+ }
+ EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
--- /dev/null
+From 4d31eef5176df06f218201bc9c0ce40babb41660 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Tue, 22 Nov 2016 14:44:17 +0100
+Subject: netfilter: x_tables: pass xt_counters struct instead of packet counter
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 4d31eef5176df06f218201bc9c0ce40babb41660 upstream.
+
+On SMP we overload the packet counter (unsigned long) to contain
+percpu offset. Hide this from callers and pass xt_counters address
+instead.
+
+Preparation patch to allocate the percpu counters in page-sized batch
+chunks.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/netfilter/x_tables.h | 6 +-----
+ net/ipv4/netfilter/arp_tables.c | 4 ++--
+ net/ipv4/netfilter/ip_tables.c | 4 ++--
+ net/ipv6/netfilter/ip6_tables.c | 5 ++---
+ net/netfilter/x_tables.c | 9 +++++++++
+ 5 files changed, 16 insertions(+), 12 deletions(-)
+
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -402,11 +402,7 @@ static inline unsigned long xt_percpu_co
+
+ return 0;
+ }
+-static inline void xt_percpu_counter_free(u64 pcnt)
+-{
+- if (nr_cpu_ids > 1)
+- free_percpu((void __percpu *) (unsigned long) pcnt);
+-}
++void xt_percpu_counter_free(struct xt_counters *cnt);
+
+ static inline struct xt_counters *
+ xt_get_this_cpu_counter(struct xt_counters *cnt)
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -447,7 +447,7 @@ find_check_entry(struct arpt_entry *e, c
+ err:
+ module_put(t->u.kernel.target->me);
+ out:
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+
+ return ret;
+ }
+@@ -527,7 +527,7 @@ static inline void cleanup_entry(struct
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+ }
+
+ /* Checks and translates the user-supplied table segment (held in
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -591,7 +591,7 @@ find_check_entry(struct ipt_entry *e, st
+ cleanup_match(ematch, net);
+ }
+
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+
+ return ret;
+ }
+@@ -679,7 +679,7 @@ cleanup_entry(struct ipt_entry *e, struc
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+ }
+
+ /* Checks and translates the user-supplied table segment (held in
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -620,7 +620,7 @@ find_check_entry(struct ip6t_entry *e, s
+ cleanup_match(ematch, net);
+ }
+
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+
+ return ret;
+ }
+@@ -707,8 +707,7 @@ static void cleanup_entry(struct ip6t_en
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+-
+- xt_percpu_counter_free(e->counters.pcnt);
++ xt_percpu_counter_free(&e->counters);
+ }
+
+ /* Checks and translates the user-supplied table segment (held in
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -1619,6 +1619,15 @@ void xt_proto_fini(struct net *net, u_in
+ }
+ EXPORT_SYMBOL_GPL(xt_proto_fini);
+
++void xt_percpu_counter_free(struct xt_counters *counters)
++{
++ unsigned long pcnt = counters->pcnt;
++
++ if (nr_cpu_ids > 1)
++ free_percpu((void __percpu *)pcnt);
++}
++EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
++
+ static int __net_init xt_net_init(struct net *net)
+ {
+ int i;
--- /dev/null
+From f28e15bacedd444608e25421c72eb2cf4527c9ca Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Tue, 22 Nov 2016 14:44:18 +0100
+Subject: netfilter: x_tables: pass xt_counters struct to counter allocator
+
+From: Florian Westphal <fw@strlen.de>
+
+commit f28e15bacedd444608e25421c72eb2cf4527c9ca upstream.
+
+Keeps some noise away from a followup patch.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/netfilter/x_tables.h | 27 +--------------------------
+ net/ipv4/netfilter/arp_tables.c | 5 +----
+ net/ipv4/netfilter/ip_tables.c | 5 +----
+ net/ipv6/netfilter/ip6_tables.c | 5 +----
+ net/netfilter/x_tables.c | 30 ++++++++++++++++++++++++++++++
+ 5 files changed, 34 insertions(+), 38 deletions(-)
+
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -376,32 +376,7 @@ static inline unsigned long ifname_compa
+ }
+
+
+-/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
+- * real (percpu) counter. On !SMP, its just the packet count,
+- * so nothing needs to be done there.
+- *
+- * xt_percpu_counter_alloc returns the address of the percpu
+- * counter, or 0 on !SMP. We force an alignment of 16 bytes
+- * so that bytes/packets share a common cache line.
+- *
+- * Hence caller must use IS_ERR_VALUE to check for error, this
+- * allows us to return 0 for single core systems without forcing
+- * callers to deal with SMP vs. NONSMP issues.
+- */
+-static inline unsigned long xt_percpu_counter_alloc(void)
+-{
+- if (nr_cpu_ids > 1) {
+- void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
+- sizeof(struct xt_counters));
+-
+- if (res == NULL)
+- return -ENOMEM;
+-
+- return (__force unsigned long) res;
+- }
+-
+- return 0;
+-}
++bool xt_percpu_counter_alloc(struct xt_counters *counters);
+ void xt_percpu_counter_free(struct xt_counters *cnt);
+
+ static inline struct xt_counters *
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -423,13 +423,10 @@ find_check_entry(struct arpt_entry *e, c
+ {
+ struct xt_entry_target *t;
+ struct xt_target *target;
+- unsigned long pcnt;
+ int ret;
+
+- pcnt = xt_percpu_counter_alloc();
+- if (IS_ERR_VALUE(pcnt))
++ if (!xt_percpu_counter_alloc(&e->counters))
+ return -ENOMEM;
+- e->counters.pcnt = pcnt;
+
+ t = arpt_get_target(e);
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -548,12 +548,9 @@ find_check_entry(struct ipt_entry *e, st
+ unsigned int j;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+- unsigned long pcnt;
+
+- pcnt = xt_percpu_counter_alloc();
+- if (IS_ERR_VALUE(pcnt))
++ if (!xt_percpu_counter_alloc(&e->counters))
+ return -ENOMEM;
+- e->counters.pcnt = pcnt;
+
+ j = 0;
+ mtpar.net = net;
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -578,12 +578,9 @@ find_check_entry(struct ip6t_entry *e, s
+ unsigned int j;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+- unsigned long pcnt;
+
+- pcnt = xt_percpu_counter_alloc();
+- if (IS_ERR_VALUE(pcnt))
++ if (!xt_percpu_counter_alloc(&e->counters))
+ return -ENOMEM;
+- e->counters.pcnt = pcnt;
+
+ j = 0;
+ mtpar.net = net;
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -1619,6 +1619,36 @@ void xt_proto_fini(struct net *net, u_in
+ }
+ EXPORT_SYMBOL_GPL(xt_proto_fini);
+
++/**
++ * xt_percpu_counter_alloc - allocate x_tables rule counter
++ *
++ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
++ *
++ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
++ * contain the address of the real (percpu) counter.
++ *
++ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
++ * to fetch the real percpu counter.
++ *
++ * returns false on error.
++ */
++bool xt_percpu_counter_alloc(struct xt_counters *counter)
++{
++ void __percpu *res;
++
++ if (nr_cpu_ids <= 1)
++ return true;
++
++ res = __alloc_percpu(sizeof(struct xt_counters),
++ sizeof(struct xt_counters));
++ if (!res)
++ return false;
++
++ counter->pcnt = (__force unsigned long)res;
++ return true;
++}
++EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
++
+ void xt_percpu_counter_free(struct xt_counters *counters)
+ {
+ unsigned long pcnt = counters->pcnt;
netfilter-ebtables-config_compat-don-t-trust-userland-offsets.patch
netfilter-bridge-ebt_among-add-missing-match-size-checks.patch
netfilter-ipv6-fix-use-after-free-write-in-nf_nat_ipv6_manip_pkt.patch
+netfilter-x_tables-pass-xt_counters-struct-instead-of-packet-counter.patch
+netfilter-x_tables-pass-xt_counters-struct-to-counter-allocator.patch
+netfilter-x_tables-pack-percpu-counter-allocations.patch