]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
netfilter: x_tables: pack percpu counter allocations
authorFlorian Westphal <fw@strlen.de>
Tue, 22 Nov 2016 13:44:19 +0000 (14:44 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 18 Mar 2018 10:17:52 +0000 (11:17 +0100)
commit ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 upstream.

instead of allocating each xt_counter individually, allocate 4k chunks
and then use these for counter allocation requests.

This should speed up rule evaluation by increasing data locality,
also speeds up ruleset loading because we reduce calls to the percpu
allocator.

As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
arches with 64k page size.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/linux/netfilter/x_tables.h
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/x_tables.c

index 1c9b228ad1a8d4521300eee17860cb4f301793e1..53e361a9af794fe31c91e4b7d1fb53cc0077f71c 100644 (file)
@@ -368,8 +368,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
        return ret;
 }
 
+struct xt_percpu_counter_alloc_state {
+       unsigned int off;
+       const char __percpu *mem;
+};
 
-bool xt_percpu_counter_alloc(struct xt_counters *counters);
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+                            struct xt_counters *counter);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
index e4ac858385d3577de7876b84e9cede0b76a9e54d..f51b32ed353c911494895594f829f8ecc337a726 100644 (file)
@@ -511,13 +511,14 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 }
 
 static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+                struct xt_percpu_counter_alloc_state *alloc_state)
 {
        struct xt_entry_target *t;
        struct xt_target *target;
        int ret;
 
-       if (!xt_percpu_counter_alloc(&e->counters))
+       if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
                return -ENOMEM;
 
        t = arpt_get_target(e);
@@ -634,6 +635,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 static int translate_table(struct xt_table_info *newinfo, void *entry0,
                           const struct arpt_replace *repl)
 {
+       struct xt_percpu_counter_alloc_state alloc_state = { 0 };
        struct arpt_entry *iter;
        unsigned int *offsets;
        unsigned int i;
@@ -707,7 +709,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
        /* Finally, each sanity check must pass */
        i = 0;
        xt_entry_foreach(iter, entry0, newinfo->size) {
-               ret = find_check_entry(iter, repl->name, repl->size);
+               ret = find_check_entry(iter, repl->name, repl->size,
+                                      &alloc_state);
                if (ret != 0)
                        break;
                ++i;
index be7405aff96391760ad44284257af1d7c3d6494a..dac62b5e7fe32af0c42d93d918a054164b47cea0 100644 (file)
@@ -649,7 +649,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-                unsigned int size)
+                unsigned int size,
+                struct xt_percpu_counter_alloc_state *alloc_state)
 {
        struct xt_entry_target *t;
        struct xt_target *target;
@@ -658,7 +659,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
        struct xt_mtchk_param mtpar;
        struct xt_entry_match *ematch;
 
-       if (!xt_percpu_counter_alloc(&e->counters))
+       if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
                return -ENOMEM;
 
        j = 0;
@@ -803,6 +804,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                const struct ipt_replace *repl)
 {
+       struct xt_percpu_counter_alloc_state alloc_state = { 0 };
        struct ipt_entry *iter;
        unsigned int *offsets;
        unsigned int i;
@@ -872,7 +874,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
        /* Finally, each sanity check must pass */
        i = 0;
        xt_entry_foreach(iter, entry0, newinfo->size) {
-               ret = find_check_entry(iter, net, repl->name, repl->size);
+               ret = find_check_entry(iter, net, repl->name, repl->size,
+                                      &alloc_state);
                if (ret != 0)
                        break;
                ++i;
index a4f39f7ca00815ca07f1971635a8d7abf3330663..795c343347ecee64c3d947b85700fae1d8f7d824 100644 (file)
@@ -662,7 +662,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
-                unsigned int size)
+                unsigned int size,
+                struct xt_percpu_counter_alloc_state *alloc_state)
 {
        struct xt_entry_target *t;
        struct xt_target *target;
@@ -671,7 +672,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
        struct xt_mtchk_param mtpar;
        struct xt_entry_match *ematch;
 
-       if (!xt_percpu_counter_alloc(&e->counters))
+       if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
                return -ENOMEM;
 
        j = 0;
@@ -814,6 +815,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                const struct ip6t_replace *repl)
 {
+       struct xt_percpu_counter_alloc_state alloc_state = { 0 };
        struct ip6t_entry *iter;
        unsigned int *offsets;
        unsigned int i;
@@ -883,7 +885,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
        /* Finally, each sanity check must pass */
        i = 0;
        xt_entry_foreach(iter, entry0, newinfo->size) {
-               ret = find_check_entry(iter, net, repl->name, repl->size);
+               ret = find_check_entry(iter, net, repl->name, repl->size,
+                                      &alloc_state);
                if (ret != 0)
                        break;
                ++i;
index cfab40c59b9bd22571f54edee6903c6c608be91b..34ae20490c94d06e833b48816ca6024eeb141c8d 100644 (file)
@@ -38,6 +38,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
+#define XT_PCPU_BLOCK_SIZE 4096
+
 struct compat_delta {
        unsigned int offset; /* offset in kernel */
        int delta; /* delta in 32bit user land */
@@ -1595,6 +1597,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
 /**
  * xt_percpu_counter_alloc - allocate x_tables rule counter
  *
+ * @state: pointer to xt_percpu allocation state
  * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
  *
  * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
@@ -1603,21 +1606,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
  * Rule evaluation needs to use xt_get_this_cpu_counter() helper
  * to fetch the real percpu counter.
  *
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
  * returns false on error.
  */
-bool xt_percpu_counter_alloc(struct xt_counters *counter)
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+                            struct xt_counters *counter)
 {
-       void __percpu *res;
+       BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
 
        if (nr_cpu_ids <= 1)
                return true;
 
-       res = __alloc_percpu(sizeof(struct xt_counters),
-                            sizeof(struct xt_counters));
-       if (!res)
-               return false;
-
-       counter->pcnt = (__force unsigned long)res;
+       if (!state->mem) {
+               state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+                                           XT_PCPU_BLOCK_SIZE);
+               if (!state->mem)
+                       return false;
+       }
+       counter->pcnt = (__force unsigned long)(state->mem + state->off);
+       state->off += sizeof(*counter);
+       if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+               state->mem = NULL;
+               state->off = 0;
+       }
        return true;
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
@@ -1626,7 +1642,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
 {
        unsigned long pcnt = counters->pcnt;
 
-       if (nr_cpu_ids > 1)
+       if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
                free_percpu((void __percpu *)pcnt);
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_free);