]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Mar 2024 13:33:07 +0000 (14:33 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Mar 2024 13:33:07 +0000 (14:33 +0100)
added patches:
bpf-add-bpf_fib_lookup_skip_neigh-for-bpf_fib_lookup.patch
bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch
bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch

queue-5.15/bpf-add-bpf_fib_lookup_skip_neigh-for-bpf_fib_lookup.patch [new file with mode: 0644]
queue-5.15/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch [new file with mode: 0644]
queue-5.15/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/bpf-add-bpf_fib_lookup_skip_neigh-for-bpf_fib_lookup.patch b/queue-5.15/bpf-add-bpf_fib_lookup_skip_neigh-for-bpf_fib_lookup.patch
new file mode 100644 (file)
index 0000000..7a3345e
--- /dev/null
@@ -0,0 +1,174 @@
+From 9db7c412d475d56b0205cf678b4451191e24b7e6 Mon Sep 17 00:00:00 2001
+From: Martin KaFai Lau <martin.lau@kernel.org>
+Date: Fri, 17 Feb 2023 12:55:14 -0800
+Subject: bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+commit 31de4105f00d64570139bc5494a201b0bd57349f upstream.
+
+The bpf_fib_lookup() also looks up the neigh table.
+This was done before bpf_redirect_neigh() was added.
+
+In the use case that does not manage the neigh table
+and requires bpf_fib_lookup() to lookup a fib to
+decide if it needs to redirect or not, the bpf prog can
+depend only on using bpf_redirect_neigh() to lookup the
+neigh. It also keeps the neigh entries fresh and connected.
+
+This patch adds a bpf_fib_lookup flag, SKIP_NEIGH, to avoid
+the double neigh lookup when the bpf prog always call
+bpf_redirect_neigh() to do the neigh lookup. The params->smac
+output is skipped together when SKIP_NEIGH is set because
+bpf_redirect_neigh() will figure out the smac also.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230217205515.3583372-1-martin.lau@linux.dev
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/bpf.h       |    6 ++++++
+ net/core/filter.c              |   39 ++++++++++++++++++++++++++-------------
+ tools/include/uapi/linux/bpf.h |    6 ++++++
+ 3 files changed, 38 insertions(+), 13 deletions(-)
+
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -3014,6 +3014,11 @@ union bpf_attr {
+  *            **BPF_FIB_LOOKUP_OUTPUT**
+  *                    Perform lookup from an egress perspective (default is
+  *                    ingress).
++ *            **BPF_FIB_LOOKUP_SKIP_NEIGH**
++ *                    Skip the neighbour table lookup. *params*->dmac
++ *                    and *params*->smac will not be set as output. A common
++ *                    use case is to call **bpf_redirect_neigh**\ () after
++ *                    doing **bpf_fib_lookup**\ ().
+  *
+  *            *ctx* is either **struct xdp_md** for XDP programs or
+  *            **struct sk_buff** tc cls_act programs.
+@@ -6040,6 +6045,7 @@ struct bpf_raw_tracepoint_args {
+ enum {
+       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
++      BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ };
+ enum {
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5392,12 +5392,8 @@ static const struct bpf_func_proto bpf_s
+ #endif
+ #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
+-static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
+-                                const struct neighbour *neigh,
+-                                const struct net_device *dev, u32 mtu)
++static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
+ {
+-      memcpy(params->dmac, neigh->ha, ETH_ALEN);
+-      memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+       params->h_vlan_TCI = 0;
+       params->h_vlan_proto = 0;
+       if (mtu)
+@@ -5508,21 +5504,29 @@ static int bpf_ipv4_fib_lookup(struct ne
+       if (likely(nhc->nhc_gw_family != AF_INET6)) {
+               if (nhc->nhc_gw_family)
+                       params->ipv4_dst = nhc->nhc_gw.ipv4;
+-
+-              neigh = __ipv4_neigh_lookup_noref(dev,
+-                                               (__force u32)params->ipv4_dst);
+       } else {
+               struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
+               params->family = AF_INET6;
+               *dst = nhc->nhc_gw.ipv6;
+-              neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+       }
++      if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
++              goto set_fwd_params;
++
++      if (likely(nhc->nhc_gw_family != AF_INET6))
++              neigh = __ipv4_neigh_lookup_noref(dev,
++                                                (__force u32)params->ipv4_dst);
++      else
++              neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
++
+       if (!neigh || !(neigh->nud_state & NUD_VALID))
+               return BPF_FIB_LKUP_RET_NO_NEIGH;
++      memcpy(params->dmac, neigh->ha, ETH_ALEN);
++      memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+-      return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
++set_fwd_params:
++      return bpf_fib_set_fwd_params(params, mtu);
+ }
+ #endif
+@@ -5630,24 +5634,33 @@ static int bpf_ipv6_fib_lookup(struct ne
+       params->rt_metric = res.f6i->fib6_metric;
+       params->ifindex = dev->ifindex;
++      if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
++              goto set_fwd_params;
++
+       /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
+        * not needed here.
+        */
+       neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+       if (!neigh || !(neigh->nud_state & NUD_VALID))
+               return BPF_FIB_LKUP_RET_NO_NEIGH;
++      memcpy(params->dmac, neigh->ha, ETH_ALEN);
++      memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+-      return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
++set_fwd_params:
++      return bpf_fib_set_fwd_params(params, mtu);
+ }
+ #endif
++#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
++                           BPF_FIB_LOOKUP_SKIP_NEIGH)
++
+ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+          struct bpf_fib_lookup *, params, int, plen, u32, flags)
+ {
+       if (plen < sizeof(*params))
+               return -EINVAL;
+-      if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
++      if (flags & ~BPF_FIB_LOOKUP_MASK)
+               return -EINVAL;
+       switch (params->family) {
+@@ -5685,7 +5698,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk
+       if (plen < sizeof(*params))
+               return -EINVAL;
+-      if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
++      if (flags & ~BPF_FIB_LOOKUP_MASK)
+               return -EINVAL;
+       if (params->tot_len)
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -3014,6 +3014,11 @@ union bpf_attr {
+  *            **BPF_FIB_LOOKUP_OUTPUT**
+  *                    Perform lookup from an egress perspective (default is
+  *                    ingress).
++ *            **BPF_FIB_LOOKUP_SKIP_NEIGH**
++ *                    Skip the neighbour table lookup. *params*->dmac
++ *                    and *params*->smac will not be set as output. A common
++ *                    use case is to call **bpf_redirect_neigh**\ () after
++ *                    doing **bpf_fib_lookup**\ ().
+  *
+  *            *ctx* is either **struct xdp_md** for XDP programs or
+  *            **struct sk_buff** tc cls_act programs.
+@@ -6040,6 +6045,7 @@ struct bpf_raw_tracepoint_args {
+ enum {
+       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
++      BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ };
+ enum {
diff --git a/queue-5.15/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch b/queue-5.15/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch
new file mode 100644 (file)
index 0000000..e4767fe
--- /dev/null
@@ -0,0 +1,177 @@
+From 9d1d68efc9d9163c73085ba77e1278e4424223b7 Mon Sep 17 00:00:00 2001
+From: Louis DeLosSantos <louis.delos.devel@gmail.com>
+Date: Wed, 31 May 2023 15:38:48 -0400
+Subject: bpf: Add table ID to bpf_fib_lookup BPF helper
+
+From: Louis DeLosSantos <louis.delos.devel@gmail.com>
+
+commit 8ad77e72caae22a1ddcfd0c03f2884929e93b7a4 upstream.
+
+Add ability to specify routing table ID to the `bpf_fib_lookup` BPF
+helper.
+
+A new field `tbid` is added to `struct bpf_fib_lookup` used as
+parameters to the `bpf_fib_lookup` BPF helper.
+
+When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and
+`BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup`
+will be used as the table ID for the fib lookup.
+
+If the `tbid` does not exist the fib lookup will fail with
+`BPF_FIB_LKUP_RET_NOT_FWDED`.
+
+The `tbid` field becomes a union over the vlan related output fields
+in `struct bpf_fib_lookup` and will be zeroed immediately after usage.
+
+This functionality is useful in containerized environments.
+
+For instance, if a CNI wants to dictate the next-hop for traffic leaving
+a container it can create a container-specific routing table and perform
+a fib lookup against this table in a "host-net-namespace-side" TC program.
+
+This functionality also allows `ip rule` like functionality at the TC
+layer, allowing an eBPF program to pick a routing table based on some
+aspect of the sk_buff.
+
+As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN
+datapath.
+
+When egress traffic leaves a Pod an eBPF program attached by Cilium will
+determine which VRF the egress traffic should target, and then perform a
+FIB lookup in a specific table representing this VRF's FIB.
+
+Signed-off-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/bpf.h       |   21 ++++++++++++++++++---
+ net/core/filter.c              |   14 +++++++++++++-
+ tools/include/uapi/linux/bpf.h |   21 ++++++++++++++++++---
+ 3 files changed, 49 insertions(+), 7 deletions(-)
+
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -3011,6 +3011,10 @@ union bpf_attr {
+  *            **BPF_FIB_LOOKUP_DIRECT**
+  *                    Do a direct table lookup vs full lookup using FIB
+  *                    rules.
++ *            **BPF_FIB_LOOKUP_TBID**
++ *                    Used with BPF_FIB_LOOKUP_DIRECT.
++ *                    Use the routing table ID present in *params*->tbid
++ *                    for the fib lookup.
+  *            **BPF_FIB_LOOKUP_OUTPUT**
+  *                    Perform lookup from an egress perspective (default is
+  *                    ingress).
+@@ -6046,6 +6050,7 @@ enum {
+       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
++      BPF_FIB_LOOKUP_TBID    = (1U << 3),
+ };
+ enum {
+@@ -6106,9 +6111,19 @@ struct bpf_fib_lookup {
+               __u32           ipv6_dst[4];  /* in6_addr; network order */
+       };
+-      /* output */
+-      __be16  h_vlan_proto;
+-      __be16  h_vlan_TCI;
++      union {
++              struct {
++                      /* output */
++                      __be16  h_vlan_proto;
++                      __be16  h_vlan_TCI;
++              };
++              /* input: when accompanied with the
++               * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
++               * specific routing table to use for the fib lookup.
++               */
++              __u32   tbid;
++      };
++
+       __u8    smac[6];     /* ETH_ALEN */
+       __u8    dmac[6];     /* ETH_ALEN */
+ };
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5447,6 +5447,12 @@ static int bpf_ipv4_fib_lookup(struct ne
+               u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+               struct fib_table *tb;
++              if (flags & BPF_FIB_LOOKUP_TBID) {
++                      tbid = params->tbid;
++                      /* zero out for vlan output */
++                      params->tbid = 0;
++              }
++
+               tb = fib_get_table(net, tbid);
+               if (unlikely(!tb))
+                       return BPF_FIB_LKUP_RET_NOT_FWDED;
+@@ -5580,6 +5586,12 @@ static int bpf_ipv6_fib_lookup(struct ne
+               u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+               struct fib6_table *tb;
++              if (flags & BPF_FIB_LOOKUP_TBID) {
++                      tbid = params->tbid;
++                      /* zero out for vlan output */
++                      params->tbid = 0;
++              }
++
+               tb = ipv6_stub->fib6_get_table(net, tbid);
+               if (unlikely(!tb))
+                       return BPF_FIB_LKUP_RET_NOT_FWDED;
+@@ -5652,7 +5664,7 @@ set_fwd_params:
+ #endif
+ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+-                           BPF_FIB_LOOKUP_SKIP_NEIGH)
++                           BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+          struct bpf_fib_lookup *, params, int, plen, u32, flags)
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -3011,6 +3011,10 @@ union bpf_attr {
+  *            **BPF_FIB_LOOKUP_DIRECT**
+  *                    Do a direct table lookup vs full lookup using FIB
+  *                    rules.
++ *            **BPF_FIB_LOOKUP_TBID**
++ *                    Used with BPF_FIB_LOOKUP_DIRECT.
++ *                    Use the routing table ID present in *params*->tbid
++ *                    for the fib lookup.
+  *            **BPF_FIB_LOOKUP_OUTPUT**
+  *                    Perform lookup from an egress perspective (default is
+  *                    ingress).
+@@ -6046,6 +6050,7 @@ enum {
+       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
++      BPF_FIB_LOOKUP_TBID    = (1U << 3),
+ };
+ enum {
+@@ -6106,9 +6111,19 @@ struct bpf_fib_lookup {
+               __u32           ipv6_dst[4];  /* in6_addr; network order */
+       };
+-      /* output */
+-      __be16  h_vlan_proto;
+-      __be16  h_vlan_TCI;
++      union {
++              struct {
++                      /* output */
++                      __be16  h_vlan_proto;
++                      __be16  h_vlan_TCI;
++              };
++              /* input: when accompanied with the
++               * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
++               * specific routing table to use for the fib lookup.
++               */
++              __u32   tbid;
++      };
++
+       __u8    smac[6];     /* ETH_ALEN */
+       __u8    dmac[6];     /* ETH_ALEN */
+ };
diff --git a/queue-5.15/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch b/queue-5.15/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch
new file mode 100644 (file)
index 0000000..0685405
--- /dev/null
@@ -0,0 +1,199 @@
+From 40112f473a8e1cccadbb9dbffa134479e721cfaa Mon Sep 17 00:00:00 2001
+From: Martynas Pumputis <m@lambda.lt>
+Date: Sat, 7 Oct 2023 10:14:14 +0200
+Subject: bpf: Derive source IP addr via bpf_*_fib_lookup()
+
+From: Martynas Pumputis <m@lambda.lt>
+
+commit dab4e1f06cabb6834de14264394ccab197007302 upstream.
+
+Extend the bpf_fib_lookup() helper by making it to return the source
+IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set.
+
+For example, the following snippet can be used to derive the desired
+source IP address:
+
+    struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr };
+
+    ret = bpf_skb_fib_lookup(skb, p, sizeof(p),
+            BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH);
+    if (ret != BPF_FIB_LKUP_RET_SUCCESS)
+        return TC_ACT_SHOT;
+
+    /* the p.ipv4_src now contains the source address */
+
+The inability to derive the proper source address may cause malfunctions
+in BPF-based dataplanes for hosts containing netdevs with more than one
+routable IP address or for multi-homed hosts.
+
+For example, Cilium implements packet masquerading in BPF. If an
+egressing netdev to which the Cilium's BPF prog is attached has
+multiple IP addresses, then only one [hardcoded] IP address can be used for
+masquerading. This breaks connectivity if any other IP address should have
+been selected instead, for example, when a public and private addresses
+are attached to the same egress interface.
+
+The change was tested with Cilium [1].
+
+Nikolay Aleksandrov helped to figure out the IPv6 addr selection.
+
+[1]: https://github.com/cilium/cilium/pull/28283
+
+Signed-off-by: Martynas Pumputis <m@lambda.lt>
+Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ipv6_stubs.h       |    5 +++++
+ include/uapi/linux/bpf.h       |   10 ++++++++++
+ net/core/filter.c              |   18 +++++++++++++++++-
+ net/ipv6/af_inet6.c            |    1 +
+ tools/include/uapi/linux/bpf.h |   10 ++++++++++
+ 5 files changed, 43 insertions(+), 1 deletion(-)
+
+--- a/include/net/ipv6_stubs.h
++++ b/include/net/ipv6_stubs.h
+@@ -81,6 +81,11 @@ struct ipv6_bpf_stub {
+                                    const struct in6_addr *daddr, __be16 dport,
+                                    int dif, int sdif, struct udp_table *tbl,
+                                    struct sk_buff *skb);
++      int (*ipv6_dev_get_saddr)(struct net *net,
++                                const struct net_device *dst_dev,
++                                const struct in6_addr *daddr,
++                                unsigned int prefs,
++                                struct in6_addr *saddr);
+ };
+ extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -3023,6 +3023,11 @@ union bpf_attr {
+  *                    and *params*->smac will not be set as output. A common
+  *                    use case is to call **bpf_redirect_neigh**\ () after
+  *                    doing **bpf_fib_lookup**\ ().
++ *            **BPF_FIB_LOOKUP_SRC**
++ *                    Derive and set source IP addr in *params*->ipv{4,6}_src
++ *                    for the nexthop. If the src addr cannot be derived,
++ *                    **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
++ *                    case, *params*->dmac and *params*->smac are not set either.
+  *
+  *            *ctx* is either **struct xdp_md** for XDP programs or
+  *            **struct sk_buff** tc cls_act programs.
+@@ -6051,6 +6056,7 @@ enum {
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+       BPF_FIB_LOOKUP_TBID    = (1U << 3),
++      BPF_FIB_LOOKUP_SRC     = (1U << 4),
+ };
+ enum {
+@@ -6063,6 +6069,7 @@ enum {
+       BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+       BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+       BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
++      BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
+ };
+ struct bpf_fib_lookup {
+@@ -6097,6 +6104,9 @@ struct bpf_fib_lookup {
+               __u32   rt_metric;
+       };
++      /* input: source address to consider for lookup
++       * output: source address result from lookup
++       */
+       union {
+               __be32          ipv4_src;
+               __u32           ipv6_src[4];  /* in6_addr; network order */
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5504,6 +5504,9 @@ static int bpf_ipv4_fib_lookup(struct ne
+       params->rt_metric = res.fi->fib_priority;
+       params->ifindex = dev->ifindex;
++      if (flags & BPF_FIB_LOOKUP_SRC)
++              params->ipv4_src = fib_result_prefsrc(net, &res);
++
+       /* xdp and cls_bpf programs are run in RCU-bh so
+        * rcu_read_lock_bh is not needed here
+        */
+@@ -5646,6 +5649,18 @@ static int bpf_ipv6_fib_lookup(struct ne
+       params->rt_metric = res.f6i->fib6_metric;
+       params->ifindex = dev->ifindex;
++      if (flags & BPF_FIB_LOOKUP_SRC) {
++              if (res.f6i->fib6_prefsrc.plen) {
++                      *src = res.f6i->fib6_prefsrc.addr;
++              } else {
++                      err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
++                                                              &fl6.daddr, 0,
++                                                              src);
++                      if (err)
++                              return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
++              }
++      }
++
+       if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+               goto set_fwd_params;
+@@ -5664,7 +5679,8 @@ set_fwd_params:
+ #endif
+ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+-                           BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
++                           BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
++                           BPF_FIB_LOOKUP_SRC)
+ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+          struct bpf_fib_lookup *, params, int, plen, u32, flags)
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -1061,6 +1061,7 @@ static const struct ipv6_stub ipv6_stub_
+ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+       .inet6_bind = __inet6_bind,
+       .udp6_lib_lookup = __udp6_lib_lookup,
++      .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
+ };
+ static int __init inet6_init(void)
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -3023,6 +3023,11 @@ union bpf_attr {
+  *                    and *params*->smac will not be set as output. A common
+  *                    use case is to call **bpf_redirect_neigh**\ () after
+  *                    doing **bpf_fib_lookup**\ ().
++ *            **BPF_FIB_LOOKUP_SRC**
++ *                    Derive and set source IP addr in *params*->ipv{4,6}_src
++ *                    for the nexthop. If the src addr cannot be derived,
++ *                    **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
++ *                    case, *params*->dmac and *params*->smac are not set either.
+  *
+  *            *ctx* is either **struct xdp_md** for XDP programs or
+  *            **struct sk_buff** tc cls_act programs.
+@@ -6051,6 +6056,7 @@ enum {
+       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+       BPF_FIB_LOOKUP_TBID    = (1U << 3),
++      BPF_FIB_LOOKUP_SRC     = (1U << 4),
+ };
+ enum {
+@@ -6063,6 +6069,7 @@ enum {
+       BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+       BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+       BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
++      BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
+ };
+ struct bpf_fib_lookup {
+@@ -6097,6 +6104,9 @@ struct bpf_fib_lookup {
+               __u32   rt_metric;
+       };
++      /* input: source address to consider for lookup
++       * output: source address result from lookup
++       */
+       union {
+               __be32          ipv4_src;
+               __u32           ipv6_src[4];  /* in6_addr; network order */
index be9eb6518a8552c60c3ba3b76287c7232d00267e..17af970ba0526f5162dea80738f933175d922726 100644 (file)
@@ -77,3 +77,6 @@ gpiolib-fix-the-error-path-order-in-gpiochip_add_dat.patch
 gpio-fix-resource-unwinding-order-in-error-path.patch
 revert-interconnect-fix-locking-for-runpm-vs-reclaim.patch
 revert-interconnect-teach-lockdep-about-icc_bw_lock-order.patch
+bpf-add-bpf_fib_lookup_skip_neigh-for-bpf_fib_lookup.patch
+bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch
+bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch