]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
bpf: Derive source IP addr via bpf_*_fib_lookup()
authorMartynas Pumputis <m@lambda.lt>
Sat, 7 Oct 2023 08:14:14 +0000 (10:14 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Mar 2024 14:45:20 +0000 (14:45 +0000)
commit dab4e1f06cabb6834de14264394ccab197007302 upstream.

Extend the bpf_fib_lookup() helper by making it to return the source
IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set.

For example, the following snippet can be used to derive the desired
source IP address:

    struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr };

    ret = bpf_skb_fib_lookup(skb, p, sizeof(p),
            BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH);
    if (ret != BPF_FIB_LKUP_RET_SUCCESS)
        return TC_ACT_SHOT;

    /* the p.ipv4_src now contains the source address */

The inability to derive the proper source address may cause malfunctions
in BPF-based dataplanes for hosts containing netdevs with more than one
routable IP address or for multi-homed hosts.

For example, Cilium implements packet masquerading in BPF. If an
egressing netdev to which the Cilium's BPF prog is attached has
multiple IP addresses, then only one [hardcoded] IP address can be used for
masquerading. This breaks connectivity if any other IP address should have
been selected instead, for example, when a public and private addresses
are attached to the same egress interface.

The change was tested with Cilium [1].

Nikolay Aleksandrov helped to figure out the IPv6 addr selection.

[1]: https://github.com/cilium/cilium/pull/28283

Signed-off-by: Martynas Pumputis <m@lambda.lt>
Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/net/ipv6_stubs.h
include/uapi/linux/bpf.h
net/core/filter.c
net/ipv6/af_inet6.c
tools/include/uapi/linux/bpf.h

index c48186bf47372761493e9502da90dadcc0c26b01..21da31e1dff5d3754aa479150a1dba569d07c8f6 100644 (file)
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
                               sockptr_t optval, unsigned int optlen);
        int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
                               sockptr_t optval, sockptr_t optlen);
+       int (*ipv6_dev_get_saddr)(struct net *net,
+                                 const struct net_device *dst_dev,
+                                 const struct in6_addr *daddr,
+                                 unsigned int prefs,
+                                 struct in6_addr *saddr);
 };
 extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
 
index 02cf4d9d8eab5b4fd0b9dc280439f080ed15f772..d5d2183730b9f3863326e07f0b8350aac29dda6e 100644 (file)
@@ -3121,6 +3121,11 @@ union bpf_attr {
  *                     and *params*->smac will not be set as output. A common
  *                     use case is to call **bpf_redirect_neigh**\ () after
  *                     doing **bpf_fib_lookup**\ ().
+ *             **BPF_FIB_LOOKUP_SRC**
+ *                     Derive and set source IP addr in *params*->ipv{4,6}_src
+ *                     for the nexthop. If the src addr cannot be derived,
+ *                     **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ *                     case, *params*->dmac and *params*->smac are not set either.
  *
  *             *ctx* is either **struct xdp_md** for XDP programs or
  *             **struct sk_buff** tc cls_act programs.
@@ -6692,6 +6697,7 @@ enum {
        BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
        BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
        BPF_FIB_LOOKUP_TBID    = (1U << 3),
+       BPF_FIB_LOOKUP_SRC     = (1U << 4),
 };
 
 enum {
@@ -6704,6 +6710,7 @@ enum {
        BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
        BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
        BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+       BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
 };
 
 struct bpf_fib_lookup {
@@ -6738,6 +6745,9 @@ struct bpf_fib_lookup {
                __u32   rt_metric;
        };
 
+       /* input: source address to consider for lookup
+        * output: source address result from lookup
+        */
        union {
                __be32          ipv4_src;
                __u32           ipv6_src[4];  /* in6_addr; network order */
index 085d211085658079184bde400056fc46d2d72ec0..cb7c4651eaec8b141d7ff9bc4614e4f7e343af1d 100644 (file)
@@ -5809,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
        params->rt_metric = res.fi->fib_priority;
        params->ifindex = dev->ifindex;
 
+       if (flags & BPF_FIB_LOOKUP_SRC)
+               params->ipv4_src = fib_result_prefsrc(net, &res);
+
        /* xdp and cls_bpf programs are run in RCU-bh so
         * rcu_read_lock_bh is not needed here
         */
@@ -5951,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
        params->rt_metric = res.f6i->fib6_metric;
        params->ifindex = dev->ifindex;
 
+       if (flags & BPF_FIB_LOOKUP_SRC) {
+               if (res.f6i->fib6_prefsrc.plen) {
+                       *src = res.f6i->fib6_prefsrc.addr;
+               } else {
+                       err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
+                                                               &fl6.daddr, 0,
+                                                               src);
+                       if (err)
+                               return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+               }
+       }
+
        if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
                goto set_fwd_params;
 
@@ -5969,7 +5984,8 @@ set_fwd_params:
 #endif
 
 #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
-                            BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+                            BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+                            BPF_FIB_LOOKUP_SRC)
 
 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
           struct bpf_fib_lookup *, params, int, plen, u32, flags)
index 0b42eb8c55aaf88ebab55df0563d77949a98e887..62247621cea52ce4d55aafdbf4e4ec01c09d8e67 100644 (file)
@@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
        .udp6_lib_lookup = __udp6_lib_lookup,
        .ipv6_setsockopt = do_ipv6_setsockopt,
        .ipv6_getsockopt = do_ipv6_getsockopt,
+       .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
 };
 
 static int __init inet6_init(void)
index 02cf4d9d8eab5b4fd0b9dc280439f080ed15f772..d5d2183730b9f3863326e07f0b8350aac29dda6e 100644 (file)
@@ -3121,6 +3121,11 @@ union bpf_attr {
  *                     and *params*->smac will not be set as output. A common
  *                     use case is to call **bpf_redirect_neigh**\ () after
  *                     doing **bpf_fib_lookup**\ ().
+ *             **BPF_FIB_LOOKUP_SRC**
+ *                     Derive and set source IP addr in *params*->ipv{4,6}_src
+ *                     for the nexthop. If the src addr cannot be derived,
+ *                     **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ *                     case, *params*->dmac and *params*->smac are not set either.
  *
  *             *ctx* is either **struct xdp_md** for XDP programs or
  *             **struct sk_buff** tc cls_act programs.
@@ -6692,6 +6697,7 @@ enum {
        BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
        BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
        BPF_FIB_LOOKUP_TBID    = (1U << 3),
+       BPF_FIB_LOOKUP_SRC     = (1U << 4),
 };
 
 enum {
@@ -6704,6 +6710,7 @@ enum {
        BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
        BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
        BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+       BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
 };
 
 struct bpf_fib_lookup {
@@ -6738,6 +6745,9 @@ struct bpf_fib_lookup {
                __u32   rt_metric;
        };
 
+       /* input: source address to consider for lookup
+        * output: source address result from lookup
+        */
        union {
                __be32          ipv4_src;
                __u32           ipv6_src[4];  /* in6_addr; network order */