From 192c6ce60e9c161ecc240fab721c4f9879f07239 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 14:33:19 +0100 Subject: [PATCH] 6.1-stable patches added patches: bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch --- ...able-id-to-bpf_fib_lookup-bpf-helper.patch | 177 ++++++++++++++++ ...-source-ip-addr-via-bpf_-_fib_lookup.patch | 199 ++++++++++++++++++ queue-6.1/series | 2 + 3 files changed, 378 insertions(+) create mode 100644 queue-6.1/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch create mode 100644 queue-6.1/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch diff --git a/queue-6.1/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch b/queue-6.1/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch new file mode 100644 index 00000000000..41f41791f33 --- /dev/null +++ b/queue-6.1/bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch @@ -0,0 +1,177 @@ +From ddc8f013eda8d06600cf0d832feac578fd5cffa3 Mon Sep 17 00:00:00 2001 +From: Louis DeLosSantos +Date: Wed, 31 May 2023 15:38:48 -0400 +Subject: bpf: Add table ID to bpf_fib_lookup BPF helper + +From: Louis DeLosSantos + +commit 8ad77e72caae22a1ddcfd0c03f2884929e93b7a4 upstream. + +Add ability to specify routing table ID to the `bpf_fib_lookup` BPF +helper. + +A new field `tbid` is added to `struct bpf_fib_lookup` used as +parameters to the `bpf_fib_lookup` BPF helper. + +When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and +`BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup` +will be used as the table ID for the fib lookup. + +If the `tbid` does not exist the fib lookup will fail with +`BPF_FIB_LKUP_RET_NOT_FWDED`. + +The `tbid` field becomes a union over the vlan related output fields +in `struct bpf_fib_lookup` and will be zeroed immediately after usage. + +This functionality is useful in containerized environments. + +For instance, if a CNI wants to dictate the next-hop for traffic leaving +a container it can create a container-specific routing table and perform +a fib lookup against this table in a "host-net-namespace-side" TC program. + +This functionality also allows `ip rule` like functionality at the TC +layer, allowing an eBPF program to pick a routing table based on some +aspect of the sk_buff. + +As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN +datapath. + +When egress traffic leaves a Pod an eBPF program attached by Cilium will +determine which VRF the egress traffic should target, and then perform a +FIB lookup in a specific table representing this VRF's FIB. + +Signed-off-by: Louis DeLosSantos +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- + net/core/filter.c | 14 +++++++++++++- + tools/include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- + 3 files changed, 49 insertions(+), 7 deletions(-) + +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -3109,6 +3109,10 @@ union bpf_attr { + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. ++ * **BPF_FIB_LOOKUP_TBID** ++ * Used with BPF_FIB_LOOKUP_DIRECT. ++ * Use the routing table ID present in *params*->tbid ++ * for the fib lookup. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). +@@ -6687,6 +6691,7 @@ enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), ++ BPF_FIB_LOOKUP_TBID = (1U << 3), + }; + + enum { +@@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + +- /* output */ +- __be16 h_vlan_proto; +- __be16 h_vlan_TCI; ++ union { ++ struct { ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ }; ++ /* input: when accompanied with the ++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a ++ * specific routing table to use for the fib lookup. ++ */ ++ __u32 tbid; ++ }; ++ + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct ne + u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; + struct fib_table *tb; + ++ if (flags & BPF_FIB_LOOKUP_TBID) { ++ tbid = params->tbid; ++ /* zero out for vlan output */ ++ params->tbid = 0; ++ } ++ + tb = fib_get_table(net, tbid); + if (unlikely(!tb)) + return BPF_FIB_LKUP_RET_NOT_FWDED; +@@ -5885,6 +5891,12 @@ static int bpf_ipv6_fib_lookup(struct ne + u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; + struct fib6_table *tb; + ++ if (flags & BPF_FIB_LOOKUP_TBID) { ++ tbid = params->tbid; ++ /* zero out for vlan output */ ++ params->tbid = 0; ++ } ++ + tb = ipv6_stub->fib6_get_table(net, tbid); + if (unlikely(!tb)) + return BPF_FIB_LKUP_RET_NOT_FWDED; +@@ -5957,7 +5969,7 @@ set_fwd_params: + #endif + + #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ +- BPF_FIB_LOOKUP_SKIP_NEIGH) ++ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + + BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, + struct bpf_fib_lookup *, params, int, plen, u32, flags) +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -3109,6 +3109,10 @@ union bpf_attr { + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. ++ * **BPF_FIB_LOOKUP_TBID** ++ * Used with BPF_FIB_LOOKUP_DIRECT. ++ * Use the routing table ID present in *params*->tbid ++ * for the fib lookup. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). +@@ -6687,6 +6691,7 @@ enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), ++ BPF_FIB_LOOKUP_TBID = (1U << 3), + }; + + enum { +@@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + +- /* output */ +- __be16 h_vlan_proto; +- __be16 h_vlan_TCI; ++ union { ++ struct { ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ }; ++ /* input: when accompanied with the ++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a ++ * specific routing table to use for the fib lookup. ++ */ ++ __u32 tbid; ++ }; ++ + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; diff --git a/queue-6.1/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch b/queue-6.1/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch new file mode 100644 index 00000000000..a164e95c2ea --- /dev/null +++ b/queue-6.1/bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch @@ -0,0 +1,199 @@ +From db9010b83cc326ed468c56ee3ba23fd97464d4aa Mon Sep 17 00:00:00 2001 +From: Martynas Pumputis +Date: Sat, 7 Oct 2023 10:14:14 +0200 +Subject: bpf: Derive source IP addr via bpf_*_fib_lookup() + +From: Martynas Pumputis + +commit dab4e1f06cabb6834de14264394ccab197007302 upstream. + +Extend the bpf_fib_lookup() helper by making it to return the source +IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. + +For example, the following snippet can be used to derive the desired +source IP address: + + struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; + + ret = bpf_skb_fib_lookup(skb, p, sizeof(p), + BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); + if (ret != BPF_FIB_LKUP_RET_SUCCESS) + return TC_ACT_SHOT; + + /* the p.ipv4_src now contains the source address */ + +The inability to derive the proper source address may cause malfunctions +in BPF-based dataplanes for hosts containing netdevs with more than one +routable IP address or for multi-homed hosts. + +For example, Cilium implements packet masquerading in BPF. If an +egressing netdev to which the Cilium's BPF prog is attached has +multiple IP addresses, then only one [hardcoded] IP address can be used for +masquerading. This breaks connectivity if any other IP address should have +been selected instead, for example, when a public and private addresses +are attached to the same egress interface. + +The change was tested with Cilium [1]. + +Nikolay Aleksandrov helped to figure out the IPv6 addr selection. + +[1]: https://github.com/cilium/cilium/pull/28283 + +Signed-off-by: Martynas Pumputis +Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt +Signed-off-by: Martin KaFai Lau +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ipv6_stubs.h | 5 +++++ + include/uapi/linux/bpf.h | 10 ++++++++++ + net/core/filter.c | 18 +++++++++++++++++- + net/ipv6/af_inet6.c | 1 + + tools/include/uapi/linux/bpf.h | 10 ++++++++++ + 5 files changed, 43 insertions(+), 1 deletion(-) + +--- a/include/net/ipv6_stubs.h ++++ b/include/net/ipv6_stubs.h +@@ -85,6 +85,11 @@ struct ipv6_bpf_stub { + sockptr_t optval, unsigned int optlen); + int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); ++ int (*ipv6_dev_get_saddr)(struct net *net, ++ const struct net_device *dst_dev, ++ const struct in6_addr *daddr, ++ unsigned int prefs, ++ struct in6_addr *saddr); + }; + extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -3121,6 +3121,11 @@ union bpf_attr { + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). ++ * **BPF_FIB_LOOKUP_SRC** ++ * Derive and set source IP addr in *params*->ipv{4,6}_src ++ * for the nexthop. If the src addr cannot be derived, ++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this ++ * case, *params*->dmac and *params*->smac are not set either. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. +@@ -6692,6 +6697,7 @@ enum { + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), ++ BPF_FIB_LOOKUP_SRC = (1U << 4), + }; + + enum { +@@ -6704,6 +6710,7 @@ enum { + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ + }; + + struct bpf_fib_lookup { +@@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { + __u32 rt_metric; + }; + ++ /* input: source address to consider for lookup ++ * output: source address result from lookup ++ */ + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -5809,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct ne + params->rt_metric = res.fi->fib_priority; + params->ifindex = dev->ifindex; + ++ if (flags & BPF_FIB_LOOKUP_SRC) ++ params->ipv4_src = fib_result_prefsrc(net, &res); ++ + /* xdp and cls_bpf programs are run in RCU-bh so + * rcu_read_lock_bh is not needed here + */ +@@ -5951,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct ne + params->rt_metric = res.f6i->fib6_metric; + params->ifindex = dev->ifindex; + ++ if (flags & BPF_FIB_LOOKUP_SRC) { ++ if (res.f6i->fib6_prefsrc.plen) { ++ *src = res.f6i->fib6_prefsrc.addr; ++ } else { ++ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, ++ &fl6.daddr, 0, ++ src); ++ if (err) ++ return BPF_FIB_LKUP_RET_NO_SRC_ADDR; ++ } ++ } ++ + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + +@@ -5969,7 +5984,8 @@ set_fwd_params: + #endif + + #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ +- BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) ++ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ ++ BPF_FIB_LOOKUP_SRC) + + BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, + struct bpf_fib_lookup *, params, int, plen, u32, flags) +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_b + .udp6_lib_lookup = __udp6_lib_lookup, + .ipv6_setsockopt = do_ipv6_setsockopt, + .ipv6_getsockopt = do_ipv6_getsockopt, ++ .ipv6_dev_get_saddr = ipv6_dev_get_saddr, + }; + + static int __init inet6_init(void) +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -3121,6 +3121,11 @@ union bpf_attr { + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). ++ * **BPF_FIB_LOOKUP_SRC** ++ * Derive and set source IP addr in *params*->ipv{4,6}_src ++ * for the nexthop. If the src addr cannot be derived, ++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this ++ * case, *params*->dmac and *params*->smac are not set either. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. +@@ -6692,6 +6697,7 @@ enum { + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), ++ BPF_FIB_LOOKUP_SRC = (1U << 4), + }; + + enum { +@@ -6704,6 +6710,7 @@ enum { + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ + }; + + struct bpf_fib_lookup { +@@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { + __u32 rt_metric; + }; + ++ /* input: source address to consider for lookup ++ * output: source address result from lookup ++ */ + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/queue-6.1/series b/queue-6.1/series index 44d51dab977..464c40a71dd 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -210,3 +210,5 @@ x86-entry_32-add-verw-just-before-userspace-transition.patch x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch +bpf-add-table-id-to-bpf_fib_lookup-bpf-helper.patch +bpf-derive-source-ip-addr-via-bpf_-_fib_lookup.patch -- 2.47.3