]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfrm: Add an inbound percpu state cache.
authorSteffen Klassert <steffen.klassert@secunet.com>
Wed, 23 Oct 2024 10:53:44 +0000 (12:53 +0200)
committerSteffen Klassert <steffen.klassert@secunet.com>
Tue, 29 Oct 2024 10:56:18 +0000 (11:56 +0100)
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
include/net/netns/xfrm.h
include/net/xfrm.h
net/ipv4/esp4_offload.c
net/ipv6/esp6_offload.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_state.c

index ae60d66640954c8f354c298a8aa0a59dbb5387e8..23dd647fe0248c161a26cf2da344a7f4bc67451b 100644 (file)
@@ -43,6 +43,7 @@ struct netns_xfrm {
        struct hlist_head       __rcu *state_bysrc;
        struct hlist_head       __rcu *state_byspi;
        struct hlist_head       __rcu *state_byseq;
+       struct hlist_head        __percpu *state_cache_input;
        unsigned int            state_hmask;
        unsigned int            state_num;
        struct work_struct      state_hash_work;
index 0b394c5fb5f3ab9f1c475e508ce75161bcc031de..2b87999bd5aae37c4f2d5be69794b316d45b2173 100644 (file)
@@ -185,6 +185,7 @@ struct xfrm_state {
        struct hlist_node       byspi;
        struct hlist_node       byseq;
        struct hlist_node       state_cache;
+       struct hlist_node       state_cache_input;
 
        refcount_t              refcnt;
        spinlock_t              lock;
@@ -1650,6 +1651,10 @@ int xfrm_state_update(struct xfrm_state *x);
 struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
                                     const xfrm_address_t *daddr, __be32 spi,
                                     u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+                                          const xfrm_address_t *daddr,
+                                          __be32 spi, u8 proto,
+                                          unsigned short family);
 struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
                                            const xfrm_address_t *daddr,
                                            const xfrm_address_t *saddr,
index 80c4ea0e12f48a57ca2c746df0fa07c0e9772cbb..e0d94270da28a320c3075ff3d31792fd97770dd7 100644 (file)
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
                if (sp->len == XFRM_MAX_DEPTH)
                        goto out_reset;
 
-               x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-                                     (xfrm_address_t *)&ip_hdr(skb)->daddr,
-                                     spi, IPPROTO_ESP, AF_INET);
+               x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+                                           (xfrm_address_t *)&ip_hdr(skb)->daddr,
+                                           spi, IPPROTO_ESP, AF_INET);
 
                if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
                        /* non-offload path will record the error and audit log */
index 919ebfabbe4ee272e268d93586b9d06ec35588ea..7b41fb4f00b5876471107f606df977f0a47675c7 100644 (file)
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
                if (sp->len == XFRM_MAX_DEPTH)
                        goto out_reset;
 
-               x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-                                     (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
-                                     spi, IPPROTO_ESP, AF_INET6);
+               x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+                                           (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+                                           spi, IPPROTO_ESP, AF_INET6);
 
                if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
                        /* non-offload path will record the error and audit log */
index 749e7eea99e465b069a698e1cc92218d6f4fca63..841a60a6fbfea39c7b63333c387c6841a0afcb78 100644 (file)
@@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                        goto drop;
                }
 
-               x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+               x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
                if (x == NULL) {
                        secpath_reset(skb);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
index a2047825f6c8861f8a59b5adfcd081d36fd4325c..e3266a5d4f90d8cebf0a6df7788944200b04ea6b 100644 (file)
@@ -754,6 +754,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
                        hlist_del_rcu(&x->byseq);
                if (!hlist_unhashed(&x->state_cache))
                        hlist_del_rcu(&x->state_cache);
+               if (!hlist_unhashed(&x->state_cache_input))
+                       hlist_del_rcu(&x->state_cache_input);
+
                if (x->id.spi)
                        hlist_del_rcu(&x->byspi);
                net->xfrm.state_num--;
@@ -1106,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
        return NULL;
 }
 
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+                                          const xfrm_address_t *daddr,
+                                          __be32 spi, u8 proto,
+                                          unsigned short family)
+{
+       struct hlist_head *state_cache_input;
+       struct xfrm_state *x = NULL;
+       int cpu = get_cpu();
+
+       state_cache_input =  per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+               if (x->props.family != family ||
+                   x->id.spi       != spi ||
+                   x->id.proto     != proto ||
+                   !xfrm_addr_equal(&x->id.daddr, daddr, family))
+                       continue;
+
+               if ((mark & x->mark.m) != x->mark.v)
+                       continue;
+               if (!xfrm_state_hold_rcu(x))
+                       continue;
+               goto out;
+       }
+
+       x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+       if (x && x->km.state == XFRM_STATE_VALID) {
+               spin_lock_bh(&net->xfrm.xfrm_state_lock);
+               if (hlist_unhashed(&x->state_cache_input)) {
+                       hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+               } else {
+                       hlist_del_rcu(&x->state_cache_input);
+                       hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+               }
+               spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+       }
+
+out:
+       rcu_read_unlock();
+       put_cpu();
+       return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
                                                     const xfrm_address_t *daddr,
                                                     const xfrm_address_t *saddr,
@@ -3079,6 +3128,11 @@ int __net_init xfrm_state_init(struct net *net)
        net->xfrm.state_byseq = xfrm_hash_alloc(sz);
        if (!net->xfrm.state_byseq)
                goto out_byseq;
+
+       net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+       if (!net->xfrm.state_cache_input)
+               goto out_state_cache_input;
+
        net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
        net->xfrm.state_num = 0;
@@ -3088,6 +3142,8 @@ int __net_init xfrm_state_init(struct net *net)
                               &net->xfrm.xfrm_state_lock);
        return 0;
 
+out_state_cache_input:
+       xfrm_hash_free(net->xfrm.state_byseq, sz);
 out_byseq:
        xfrm_hash_free(net->xfrm.state_byspi, sz);
 out_byspi:
@@ -3117,6 +3173,7 @@ void xfrm_state_fini(struct net *net)
        xfrm_hash_free(net->xfrm.state_bysrc, sz);
        WARN_ON(!hlist_empty(net->xfrm.state_bydst));
        xfrm_hash_free(net->xfrm.state_bydst, sz);
+       free_percpu(net->xfrm.state_cache_input);
 }
 
 #ifdef CONFIG_AUDITSYSCALL