]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
netfilter: flowtable: consolidate xmit path
authorPablo Neira Ayuso <pablo@netfilter.org>
Fri, 10 Oct 2025 10:32:35 +0000 (12:32 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Thu, 27 Nov 2025 23:59:56 +0000 (23:59 +0000)
Use dev_queue_xmit() for the XMIT_NEIGH case. Store the interface index
of the real device behind the vlan/pppoe device, this introduces  an
extra lookup for the real device in the xmit path because rt->dst.dev
provides the vlan/pppoe device.

XMIT_NEIGH now looks more similar to XMIT_DIRECT but the check for stale
dst and the neighbour lookup still remain in place which is convenient
to deal with network topology changes.

Note that nft_flow_route() needs to relax the check for _XMIT_NEIGH so
the existing basic xfrm offload (which only works in one direction) does
not break.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
include/net/netfilter/nf_flow_table.h
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_flow_table_path.c

index e9f72d2558e9018ecc44a2786de89d707f7b9a41..7c330caae52bff2c3950ab075041c5e727a03584 100644 (file)
@@ -141,6 +141,7 @@ struct flow_offload_tuple {
        union {
                struct {
                        struct dst_entry *dst_cache;
+                       u32             ifidx;
                        u32             dst_cookie;
                };
                struct {
index 9441ac3d8c1a2eac32142ac43151e3acebcd8cab..98d7b3708602f90633f0e1de2b60459206480971 100644 (file)
@@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
                break;
        case FLOW_OFFLOAD_XMIT_XFRM:
        case FLOW_OFFLOAD_XMIT_NEIGH:
+               flow_tuple->ifidx = route->tuple[dir].out.ifindex;
                flow_tuple->dst_cache = dst;
                flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
                break;
index 8cd4cf7ae21120f1057c4fce5aaca4e3152ae76d..eb4f6a11e779fe8875d62105d8ad19c1b7f62121 100644 (file)
@@ -333,19 +333,18 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
        }
 }
 
+struct nf_flow_xmit {
+       const void              *dest;
+       const void              *source;
+       struct net_device       *outdev;
+};
+
 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
-                                      const struct flow_offload_tuple_rhash *tuplehash,
-                                      unsigned short type)
+                                      struct nf_flow_xmit *xmit)
 {
-       struct net_device *outdev;
-
-       outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
-       if (!outdev)
-               return NF_DROP;
-
-       skb->dev = outdev;
-       dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
-                       tuplehash->tuple.out.h_source, skb->len);
+       skb->dev = xmit->outdev;
+       dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+                       xmit->dest, xmit->source, skb->len);
        dev_queue_xmit(skb);
 
        return NF_STOLEN;
@@ -424,10 +423,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        struct nf_flowtable_ctx ctx = {
                .in     = state->in,
        };
+       struct nf_flow_xmit xmit = {};
        struct flow_offload *flow;
-       struct net_device *outdev;
+       struct neighbour *neigh;
        struct rtable *rt;
-       __be32 nexthop;
        int ret;
 
        tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
@@ -454,25 +453,34 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        switch (tuplehash->tuple.xmit_type) {
        case FLOW_OFFLOAD_XMIT_NEIGH:
                rt = dst_rtable(tuplehash->tuple.dst_cache);
-               outdev = rt->dst.dev;
-               skb->dev = outdev;
-               nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+               xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
+               if (!xmit.outdev) {
+                       flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr));
+               if (IS_ERR(neigh)) {
+                       flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               xmit.dest = neigh->ha;
                skb_dst_set_noref(skb, &rt->dst);
-               neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
-               ret = NF_STOLEN;
                break;
        case FLOW_OFFLOAD_XMIT_DIRECT:
-               ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
-               if (ret == NF_DROP)
+               xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
+               if (!xmit.outdev) {
                        flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               xmit.dest = tuplehash->tuple.out.h_dest;
+               xmit.source = tuplehash->tuple.out.h_source;
                break;
        default:
                WARN_ON_ONCE(1);
-               ret = NF_DROP;
-               break;
+               return NF_DROP;
        }
 
-       return ret;
+       return nf_flow_queue_xmit(state->net, skb, &xmit);
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
@@ -719,9 +727,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        struct nf_flowtable_ctx ctx = {
                .in     = state->in,
        };
-       const struct in6_addr *nexthop;
+       struct nf_flow_xmit xmit = {};
        struct flow_offload *flow;
-       struct net_device *outdev;
+       struct neighbour *neigh;
        struct rt6_info *rt;
        int ret;
 
@@ -749,24 +757,33 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        switch (tuplehash->tuple.xmit_type) {
        case FLOW_OFFLOAD_XMIT_NEIGH:
                rt = dst_rt6_info(tuplehash->tuple.dst_cache);
-               outdev = rt->dst.dev;
-               skb->dev = outdev;
-               nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+               xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
+               if (!xmit.outdev) {
+                       flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6));
+               if (IS_ERR(neigh)) {
+                       flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               xmit.dest = neigh->ha;
                skb_dst_set_noref(skb, &rt->dst);
-               neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
-               ret = NF_STOLEN;
                break;
        case FLOW_OFFLOAD_XMIT_DIRECT:
-               ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
-               if (ret == NF_DROP)
+               xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
+               if (!xmit.outdev) {
                        flow_offload_teardown(flow);
+                       return NF_DROP;
+               }
+               xmit.dest = tuplehash->tuple.out.h_dest;
+               xmit.source = tuplehash->tuple.out.h_source;
                break;
        default:
                WARN_ON_ONCE(1);
-               ret = NF_DROP;
-               break;
+               return NF_DROP;
        }
 
-       return ret;
+       return nf_flow_queue_xmit(state->net, skb, &xmit);
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
index e525e3745651d8e415dd449b468b4034f72ffa61..e0c69fea2e0caf78c4c37e04f3da654d1cd638aa 100644 (file)
@@ -211,11 +211,11 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
        }
        route->tuple[!dir].in.num_encaps = info.num_encaps;
        route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+       route->tuple[dir].out.ifindex = info.outdev->ifindex;
 
        if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
                memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
                memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
-               route->tuple[dir].out.ifindex = info.outdev->ifindex;
                route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
                route->tuple[dir].xmit_type = info.xmit_type;
        }
@@ -263,11 +263,10 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
        nft_default_forward_path(route, this_dst, dir);
        nft_default_forward_path(route, other_dst, !dir);
 
-       if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
-           route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+       if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
                nft_dev_forward_path(route, ct, dir, ft);
+       if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
                nft_dev_forward_path(route, ct, !dir, ft);
-       }
 
        return 0;
 }