From: Jakub Sitnicki Date: Fri, 19 Jun 2026 17:09:29 +0000 (+0200) Subject: selftests/bpf: Add LWT encap tests for skb metadata X-Git-Tag: v7.2-rc1~29^2~45^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=33a971d549d82b06c07ce6ed10c33089f80fa944;p=thirdparty%2Flinux.git selftests/bpf: Add LWT encap tests for skb metadata Test that an LWT encapsulation does not silently corrupt XDP metadata sitting in the skb headroom. Exercise all three LWT dispatch paths: - BPF LWT xmit prog reserves headroom on the LWT .xmit redirect, - mpls pushes an MPLS label on the LWT .xmit redirect, - seg6 in encap mode runs on the LWT .input redirect, - ioam6 encap inserts an IOAM Hop-by-Hop option on LWT .output redirect. Signed-off-by: Jakub Sitnicki Link: https://patch.msgid.link/20260619-bpf-lwt-drop-skb-metadata-v3-2-71d6a33ab76b@cloudflare.com Signed-off-by: Jakub Kicinski --- diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 24855381290d6..aebc5082fd77e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -45,13 +45,16 @@ CONFIG_IPV6=y CONFIG_IPV6_FOU=y CONFIG_IPV6_FOU_TUNNEL=y CONFIG_IPV6_GRE=y +CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y CONFIG_KEYS=y CONFIG_LIRC=y CONFIG_LIVEPATCH=y CONFIG_LWTUNNEL=y +CONFIG_LWTUNNEL_BPF=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_UNLOAD=y diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 26159e0499c76..448807676176c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include +#include #include "test_xdp_context_test_run.skel.h" #include "test_xdp_meta.skel.h" @@ -8,9 +10,12 @@ #define TX_NAME "veth1" #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" +#define RX_MAC "02:00:00:00:00:01" +#define TX_MAC "02:00:00:00:00:02" #define TAP_NAME "tap0" #define DUMMY_NAME "dum0" #define TAP_NETNS "xdp_context_tuntap" +#define LWT_NETNS "xdp_context_lwt" #define TEST_PAYLOAD_LEN 32 static const __u8 test_payload[TEST_PAYLOAD_LEN] = { @@ -187,6 +192,42 @@ static int write_test_packet(int tap_fd) return 0; } +/* Inject Ethernet+IPv6+UDP frame into TAP */ +static int write_test_packet_udp(int tap_fd) +{ + __u8 pkt[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + + sizeof(struct udphdr) + TEST_PAYLOAD_LEN] = {}; + struct ethhdr *eth = (void *)pkt; + struct ipv6hdr *ip6 = (void *)(eth + 1); + struct udphdr *udp = (void *)(ip6 + 1); + __u8 *payload = (void *)(udp + 1); + const __u8 tap_mac[ETH_ALEN] = { 0x02, 0, 0, 0, 0, 0x01 }; + int n; + + memcpy(eth->h_dest, tap_mac, ETH_ALEN); + eth->h_proto = htons(ETH_P_IPV6); + + ip6->version = 6; + ip6->hop_limit = 64; + ip6->nexthdr = IPPROTO_UDP; + ip6->payload_len = htons(sizeof(*udp) + TEST_PAYLOAD_LEN); + inet_pton(AF_INET6, "fd00::2", &ip6->saddr); + inet_pton(AF_INET6, "fd00:1::1", &ip6->daddr); + + udp->source = htons(42); + udp->dest = htons(42); + udp->len = htons(sizeof(*udp) + TEST_PAYLOAD_LEN); + /* UDP checksum is not validated on the forwarding path. */ + + memcpy(payload, test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, pkt, sizeof(pkt)); + if (!ASSERT_EQ(n, sizeof(pkt), "write frame")) + return -1; + + return 0; +} + static void dump_err_stream(const struct bpf_program *prog) { char buf[512]; @@ -518,3 +559,137 @@ void test_xdp_context_tuntap(void) test_xdp_meta__destroy(skel); } + +/* + * Test topology: + * + * tap0 fd00::1 + * RX: injected IPv6 UDP frame, XDP ingress sets metadata + * fwd: encap route prepends outer header(s) + * TX: TC egress validates metadata + * + * A routable IPv6 UDP frame is written into the tap fd, so it enters the RX + * path where XDP stores metadata. Routing then forwards it back out the same + * tap through an encapsulating route that prepends outer header(s). The TC + * egress program checks that the pushed header did not silently corrupt + * metadata. + */ +#define LWT_PIN_PATH "/sys/fs/bpf/xdp_context_lwt_xmit" + +enum lwt_encap_type { + LWT_ENCAP_BPF, + LWT_ENCAP_MPLS, + LWT_ENCAP_SEG6, + LWT_ENCAP_IOAM6, +}; + +static void test_lwt_encap(struct test_xdp_meta *skel, + enum lwt_encap_type type) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct bpf_program *lwt_prog = NULL; + struct netns_obj *ns = NULL; + const char *encap; + bool pinned = false; + int tap_ifindex; + int tap_fd = -1; + int ret; + + skel->bss->test_pass = false; + + switch (type) { + case LWT_ENCAP_BPF: + encap = "encap bpf xmit pinned " LWT_PIN_PATH " via fd00::2"; + lwt_prog = skel->progs.dummy_lwt_xmit; + break; + case LWT_ENCAP_MPLS: + encap = "encap mpls 100 via inet6 fd00::2"; + break; + case LWT_ENCAP_SEG6: + encap = "encap seg6 mode encap segs fd00::2"; + break; + case LWT_ENCAP_IOAM6: + encap = "encap ioam6 mode encap tundst fd00::2 " + "trace prealloc type 0x800000 ns 0 size 4 via fd00::2"; + break; + default: + return; + } + + if (lwt_prog) { + unlink(LWT_PIN_PATH); + ret = bpf_program__pin(lwt_prog, LWT_PIN_PATH); + if (!ASSERT_OK(ret, "pin lwt prog")) + return; + pinned = true; + } + + ns = netns_new(LWT_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + goto close; + + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " address " RX_MAC); + SYS(close, "sysctl -wq net.ipv6.conf.all.forwarding=1"); + SYS(close, "ip addr add fd00::1/64 dev " TAP_NAME " nodad"); + SYS(close, "ip link set dev " TAP_NAME " up"); + SYS(close, "ip neigh add fd00::2 lladdr " TX_MAC " nud permanent dev " TAP_NAME); + SYS(close, "ip -6 route add fd00:1::/64 %s dev %s", encap, TAP_NAME); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + tc_hook.ifindex = tap_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc_is_meta_empty); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + ret = write_test_packet_udp(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet_udp")) + goto close; + + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(skel->progs.tc_is_meta_empty); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); + if (pinned) + unlink(LWT_PIN_PATH); +} + +void test_xdp_context_lwt_encap(void) +{ + struct test_xdp_meta *skel; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("bpf_encap")) + test_lwt_encap(skel, LWT_ENCAP_BPF); + if (test__start_subtest("mpls_encap")) + test_lwt_encap(skel, LWT_ENCAP_MPLS); + if (test__start_subtest("seg6_encap")) + test_lwt_encap(skel, LWT_ENCAP_SEG6); + if (test__start_subtest("ioam6_encap")) + test_lwt_encap(skel, LWT_ENCAP_IOAM6); + + test_xdp_meta__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fa73b17cb9993..08b03be0b891c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -21,10 +21,6 @@ bool test_pass; -static const __u8 smac_want[ETH_ALEN] = { - 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, -}; - static const __u8 meta_want[META_SIZE] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, @@ -32,11 +28,6 @@ static const __u8 meta_want[META_SIZE] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; -static bool check_smac(const struct ethhdr *eth) -{ - return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); -} - static bool check_metadata(const char *file, int line, __u8 *meta_have) { if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) @@ -280,18 +271,47 @@ fail: return TC_ACT_SHOT; } +/* Test packets carry test metadata pattern as payload. */ +static bool is_test_packet_xdp(struct xdp_md *ctx) +{ + __u8 meta_have[META_SIZE]; + __u32 len; + + len = bpf_xdp_get_buff_len(ctx); + if (len < META_SIZE) + return false; + if (bpf_xdp_load_bytes(ctx, len - META_SIZE, meta_have, META_SIZE)) + return false; + if (__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return false; + + return true; +} + +/* Test packets carry test metadata pattern as payload. */ +static bool is_test_packet_tc(struct __sk_buff *ctx) +{ + __u8 meta_have[META_SIZE]; + + if (ctx->len < META_SIZE) + return false; + if (bpf_skb_load_bytes(ctx, ctx->len - META_SIZE, meta_have, META_SIZE)) + return false; + if (__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return false; + + return true; +} + /* Reserve and clear space for metadata but don't populate it */ SEC("xdp") int ing_xdp_zalloc_meta(struct xdp_md *ctx) { - struct ethhdr *eth = ctx_ptr(ctx, data); __u8 *meta; int ret; /* Drop any non-test packets */ - if (eth + 1 > ctx_ptr(ctx, data_end)) - return XDP_DROP; - if (!check_smac(eth)) + if (!is_test_packet_xdp(ctx)) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -310,33 +330,24 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) SEC("xdp") int ing_xdp(struct xdp_md *ctx) { - __u8 *data, *data_meta, *data_end, *payload; - struct ethhdr *eth; + __u8 *data, *data_meta; int ret; + /* Drop any non-test packets */ + if (!is_test_packet_xdp(ctx)) + return XDP_DROP; + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); if (ret < 0) return XDP_DROP; data_meta = ctx_ptr(ctx, data_meta); - data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - eth = (struct ethhdr *)data; - payload = data + sizeof(struct ethhdr); - - if (payload + META_SIZE > data_end || - data_meta + META_SIZE > data) + if (data_meta + META_SIZE > data) return XDP_DROP; - /* The Linux networking stack may send other packets on the test - * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their source MAC address. - */ - if (!check_smac(eth)) - return XDP_DROP; - - __builtin_memcpy(data_meta, payload, META_SIZE); + __builtin_memcpy(data_meta, meta_want, META_SIZE); return XDP_PASS; } @@ -353,7 +364,7 @@ int clone_data_meta_survives_data_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; if (meta_have + META_SIZE > eth) @@ -383,7 +394,7 @@ int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; if (meta_have + META_SIZE > eth) @@ -416,7 +427,7 @@ int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -436,16 +447,11 @@ out: SEC("tc") int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) { - struct bpf_dynptr data, meta; - const struct ethhdr *eth; + struct bpf_dynptr meta; __u8 *meta_have; - bpf_dynptr_from_skb(ctx, 0, &data); - eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); - if (!eth) - goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -471,15 +477,10 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; __u8 meta_have[META_SIZE]; - const struct ethhdr *eth; int err; - bpf_dynptr_from_skb(ctx, 0, &data); - eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); - if (!eth) - goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; /* Expect read-write metadata before unclone */ @@ -492,6 +493,7 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) goto out; /* Helper write to payload will unclone the packet */ + bpf_dynptr_from_skb(ctx, 0, &data); bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); @@ -511,17 +513,12 @@ out: SEC("tc") int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) { - struct bpf_dynptr data, meta; + struct bpf_dynptr meta; __u8 meta_have[META_SIZE]; - const struct ethhdr *eth; int err; - bpf_dynptr_from_skb(ctx, 0, &data); - eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); - if (!eth) - goto out; /* Ignore non-test packets */ - if (!check_smac(eth)) + if (!is_test_packet_tc(ctx)) goto out; /* Expect read-write metadata before unclone */ @@ -545,6 +542,28 @@ out: return TC_ACT_SHOT; } +SEC("lwt_xmit") +int dummy_lwt_xmit(struct __sk_buff *ctx) +{ + if (bpf_skb_change_head(ctx, sizeof(struct ipv6hdr), 0)) + return BPF_DROP; + + return BPF_OK; +} + +SEC("tc") +int tc_is_meta_empty(struct __sk_buff *ctx) +{ + if (!is_test_packet_tc(ctx)) + return TC_ACT_OK; + + if (ctx->data_meta != ctx->data) + return TC_ACT_OK; + + test_pass = true; + return TC_ACT_OK; +} + SEC("tc") int helper_skb_vlan_push_pop(struct __sk_buff *ctx) {