From: Greg Kroah-Hartman Date: Sun, 27 Aug 2023 07:55:17 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.1.49~32 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8891ebeb748da6a304198abd443950d0399bed1a;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: nfs-use-vfs-setgid-helper.patch nfsd-use-vfs-setgid-helper.patch selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch --- diff --git a/queue-6.1/nfs-use-vfs-setgid-helper.patch b/queue-6.1/nfs-use-vfs-setgid-helper.patch new file mode 100644 index 00000000000..42f970cc875 --- /dev/null +++ b/queue-6.1/nfs-use-vfs-setgid-helper.patch @@ -0,0 +1,79 @@ +From 4f704d9a8352f5c0a8fcdb6213b934630342bd44 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Tue, 14 Mar 2023 12:51:10 +0100 +Subject: nfs: use vfs setgid helper + +From: Christian Brauner + +commit 4f704d9a8352f5c0a8fcdb6213b934630342bd44 upstream. + +We've aligned setgid behavior over multiple kernel releases. The details +can be found in the following two merge messages: +cf619f891971 ("Merge tag 'fs.ovl.setgid.v6.2') +426b4ca2d6a5 ("Merge tag 'fs.setgid.v6.0') +Consistent setgid stripping behavior is now encapsulated in the +setattr_should_drop_sgid() helper which is used by all filesystems that +strip setgid bits outside of vfs proper. Switch nfs to rely on this +helper as well. Without this patch the setgid stripping tests in +xfstests will fail. + +Signed-off-by: Christian Brauner (Microsoft) +Reviewed-by: Christoph Hellwig +Message-Id: <20230313-fs-nfs-setgid-v2-1-9a59f436cfc0@kernel.org> +Signed-off-by: Christian Brauner +[ Harshit: backport to 6.1.y: + fs/internal.h -- minor conflict due to code change differences. + include/linux/fs.h -- Used struct user_namespace *mnt_userns + instead of struct mnt_idmap *idmap + fs/nfs/inode.c -- Used init_user_ns instead of nop_mnt_idmap ] +Signed-off-by: Harshit Mogalapalli +Signed-off-by: Greg Kroah-Hartman +--- + fs/attr.c | 1 + + fs/internal.h | 2 -- + fs/nfs/inode.c | 4 +--- + include/linux/fs.h | 2 ++ + 4 files changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -47,6 +47,7 @@ int setattr_should_drop_sgid(struct user + return ATTR_KILL_SGID; + return 0; + } ++EXPORT_SYMBOL(setattr_should_drop_sgid); + + /** + * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -242,5 +242,3 @@ ssize_t __kernel_write_iter(struct file + /* + * fs/attr.c + */ +-int setattr_should_drop_sgid(struct user_namespace *mnt_userns, +- const struct inode *inode); +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -717,9 +717,7 @@ void nfs_setattr_update_inode(struct ino + if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && + inode->i_mode & S_ISUID) + inode->i_mode &= ~S_ISUID; +- if ((attr->ia_valid & ATTR_KILL_SGID) != 0 && +- (inode->i_mode & (S_ISGID | S_IXGRP)) == +- (S_ISGID | S_IXGRP)) ++ if (setattr_should_drop_sgid(&init_user_ns, inode)) + inode->i_mode &= ~S_ISGID; + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode = attr->ia_mode & S_IALLUGO; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -3120,6 +3120,8 @@ extern struct inode *new_inode(struct su + extern void free_inode_nonrcu(struct inode *inode); + extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *); + extern int file_remove_privs(struct file *); ++int setattr_should_drop_sgid(struct user_namespace *mnt_userns, ++ const struct inode *inode); + + /* + * This must be used for allocating filesystems specific inodes to set diff --git a/queue-6.1/nfsd-use-vfs-setgid-helper.patch b/queue-6.1/nfsd-use-vfs-setgid-helper.patch new file mode 100644 index 00000000000..063a17afb19 --- /dev/null +++ b/queue-6.1/nfsd-use-vfs-setgid-helper.patch @@ -0,0 +1,67 @@ +From 2d8ae8c417db284f598dffb178cc01e7db0f1821 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Tue, 2 May 2023 15:36:02 +0200 +Subject: nfsd: use vfs setgid helper + +From: Christian Brauner + +commit 2d8ae8c417db284f598dffb178cc01e7db0f1821 upstream. + +We've aligned setgid behavior over multiple kernel releases. The details +can be found in commit cf619f891971 ("Merge tag 'fs.ovl.setgid.v6.2' of +git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping") and +commit 426b4ca2d6a5 ("Merge tag 'fs.setgid.v6.0' of +git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux"). +Consistent setgid stripping behavior is now encapsulated in the +setattr_should_drop_sgid() helper which is used by all filesystems that +strip setgid bits outside of vfs proper. Usually ATTR_KILL_SGID is +raised in e.g., chown_common() and is subject to the +setattr_should_drop_sgid() check to determine whether the setgid bit can +be retained. Since nfsd is raising ATTR_KILL_SGID unconditionally it +will cause notify_change() to strip it even if the caller had the +necessary privileges to retain it. Ensure that nfsd only raises +ATR_KILL_SGID if the caller lacks the necessary privileges to retain the +setgid bit. + +Without this patch the setgid stripping tests in LTP will fail: + +> As you can see, the problem is S_ISGID (0002000) was dropped on a +> non-group-executable file while chown was invoked by super-user, while + +[...] + +> fchown02.c:66: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 + +[...] + +> chown02.c:57: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 + +With this patch all tests pass. + +Reported-by: Sherry Yang +Signed-off-by: Christian Brauner +Reviewed-by: Jeff Layton +Cc: +Signed-off-by: Chuck Lever +[Harshit: backport to 6.1.y: + Use init_user_ns instead of nop_mnt_idmap as we don't have + commit abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")] +Signed-off-by: Harshit Mogalapalli +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/vfs.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -321,7 +321,9 @@ nfsd_sanitize_attrs(struct inode *inode, + iap->ia_mode &= ~S_ISGID; + } else { + /* set ATTR_KILL_* bits and let VFS handle it */ +- iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); ++ iap->ia_valid |= ATTR_KILL_SUID; ++ iap->ia_valid |= ++ setattr_should_drop_sgid(&init_user_ns, inode); + } + } + } diff --git a/queue-6.1/selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch b/queue-6.1/selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch new file mode 100644 index 00000000000..87fd0c9da27 --- /dev/null +++ b/queue-6.1/selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch @@ -0,0 +1,729 @@ +From 3c107f36db061603bee7564fbd6388b1f1879fd3 Mon Sep 17 00:00:00 2001 +From: Hangbin Liu +Date: Wed, 18 Jan 2023 10:09:27 +0800 +Subject: selftests/net: mv bpf/nat6to4.c to net folder +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hangbin Liu + +commit 3c107f36db061603bee7564fbd6388b1f1879fd3 upstream. + +There are some issues with the bpf/nat6to4.c building. + +1. It use TEST_CUSTOM_PROGS, which will add the nat6to4.o to + kselftest-list file and run by common run_tests. +2. When building the test via `make -C tools/testing/selftests/ + TARGETS="net"`, the nat6to4.o will be build in selftests/net/bpf/ + folder. But in test udpgro_frglist.sh it refers to ../bpf/nat6to4.o. + The correct path should be ./bpf/nat6to4.o. +3. If building the test via `make -C tools/testing/selftests/ TARGETS="net" + install`. The nat6to4.o will be installed to kselftest_install/net/ + folder. Then the udpgro_frglist.sh should refer to ./nat6to4.o. + +To fix the confusing test path, let's just move the nat6to4.c to net folder +and build it as TEST_GEN_FILES. + +Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests") +Tested-by: Björn Töpel +Signed-off-by: Hangbin Liu +Link: https://lore.kernel.org/r/20230118020927.3971864-1-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Hardik Garg +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/Makefile | 50 ++++++++++++++++++- + tools/testing/selftests/net/bpf/Makefile | 14 ------ + .../testing/selftests/net/{bpf => }/nat6to4.c | 0 + tools/testing/selftests/net/Makefile | 50 ++++ + tools/testing/selftests/net/bpf/Makefile | 14 - + tools/testing/selftests/net/bpf/nat6to4.c | 285 -------------------------- + tools/testing/selftests/net/nat6to4.c | 285 ++++++++++++++++++++++++++ + tools/testing/selftests/net/udpgro_frglist.sh | 8 + 5 files changed, 337 insertions(+), 305 deletions(-) + delete mode 100644 tools/testing/selftests/net/bpf/Makefile + rename tools/testing/selftests/net/{bpf => }/nat6to4.c (100%) + +--- a/tools/testing/selftests/net/Makefile ++++ b/tools/testing/selftests/net/Makefile +@@ -71,14 +71,60 @@ TEST_GEN_FILES += bind_bhash + TEST_GEN_PROGS += sk_bind_sendto_listen + TEST_GEN_PROGS += sk_connect_zero_addr + TEST_PROGS += test_ingress_egress_chaining.sh ++TEST_GEN_FILES += nat6to4.o + + TEST_FILES := settings + + include ../lib.mk + +-include bpf/Makefile +- + $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma + $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread + $(OUTPUT)/tcp_inq: LDLIBS += -lpthread + $(OUTPUT)/bind_bhash: LDLIBS += -lpthread ++ ++# Rules to generate bpf obj nat6to4.o ++CLANG ?= clang ++SCRATCH_DIR := $(OUTPUT)/tools ++BUILD_DIR := $(SCRATCH_DIR)/build ++BPFDIR := $(abspath ../../../lib/bpf) ++APIDIR := $(abspath ../../../include/uapi) ++ ++CCINCLUDE += -I../bpf ++CCINCLUDE += -I../../../../usr/include/ ++CCINCLUDE += -I$(SCRATCH_DIR)/include ++ ++BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ++ ++MAKE_DIRS := $(BUILD_DIR)/libbpf ++$(MAKE_DIRS): ++ mkdir -p $@ ++ ++# Get Clang's default includes on this system, as opposed to those seen by ++# '-target bpf'. This fixes "missing" files on some architectures/distros, ++# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. ++# ++# Use '-idirafter': Don't interfere with include mechanics except where the ++# build would have failed anyways. ++define get_sys_includes ++$(shell $(1) $(2) -v -E - &1 \ ++ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ ++$(shell $(1) $(2) -dM -E - +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- +-#include +- +-#include +-#include +- +-#define IP_DF 0x4000 // Flag: "Don't Fragment" +- +-SEC("schedcls/ingress6/nat_6") +-int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) +-{ +- const int l2_header_size = sizeof(struct ethhdr); +- void *data = (void *)(long)skb->data; +- const void *data_end = (void *)(long)skb->data_end; +- const struct ethhdr * const eth = data; // used iff is_ethernet +- const struct ipv6hdr * const ip6 = (void *)(eth + 1); +- +- // Require ethernet dst mac address to be our unicast address. +- if (skb->pkt_type != PACKET_HOST) +- return TC_ACT_OK; +- +- // Must be meta-ethernet IPv6 frame +- if (skb->protocol != bpf_htons(ETH_P_IPV6)) +- return TC_ACT_OK; +- +- // Must have (ethernet and) ipv6 header +- if (data + l2_header_size + sizeof(*ip6) > data_end) +- return TC_ACT_OK; +- +- // Ethertype - if present - must be IPv6 +- if (eth->h_proto != bpf_htons(ETH_P_IPV6)) +- return TC_ACT_OK; +- +- // IP version must be 6 +- if (ip6->version != 6) +- return TC_ACT_OK; +- // Maximum IPv6 payload length that can be translated to IPv4 +- if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) +- return TC_ACT_OK; +- switch (ip6->nexthdr) { +- case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 +- case IPPROTO_UDP: // address means there is no need to update their checksums. +- case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, +- case IPPROTO_ESP: // since there is never a checksum to update. +- break; +- default: // do not know how to handle anything else +- return TC_ACT_OK; +- } +- +- struct ethhdr eth2; // used iff is_ethernet +- +- eth2 = *eth; // Copy over the ethernet header (src/dst mac) +- eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype +- +- struct iphdr ip = { +- .version = 4, // u4 +- .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 +- .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 +- .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 +- .id = 0, // u16 +- .frag_off = bpf_htons(IP_DF), // u16 +- .ttl = ip6->hop_limit, // u8 +- .protocol = ip6->nexthdr, // u8 +- .check = 0, // u16 +- .saddr = 0x0201a8c0, // u32 +- .daddr = 0x0101a8c0, // u32 +- }; +- +- // Calculate the IPv4 one's complement checksum of the IPv4 header. +- __wsum sum4 = 0; +- +- for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) +- sum4 += ((__u16 *)&ip)[i]; +- +- // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 +- sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE +- sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 +- ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF +- +- // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. +- __wsum sum6 = 0; +- // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) +- for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) +- sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation +- +- // Note that there is no L4 checksum update: we are relying on the checksum neutrality +- // of the ipv6 address chosen by netd's ClatdController. +- +- // Packet mutations begin - point of no return, but if this first modification fails +- // the packet is probably still pristine, so let clatd handle it. +- if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) +- return TC_ACT_OK; +- bpf_csum_update(skb, sum6); +- +- data = (void *)(long)skb->data; +- data_end = (void *)(long)skb->data_end; +- if (data + l2_header_size + sizeof(struct iphdr) > data_end) +- return TC_ACT_SHOT; +- +- struct ethhdr *new_eth = data; +- +- // Copy over the updated ethernet header +- *new_eth = eth2; +- +- // Copy over the new ipv4 header. +- *(struct iphdr *)(new_eth + 1) = ip; +- return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +-} +- +-SEC("schedcls/egress4/snat4") +-int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) +-{ +- const int l2_header_size = sizeof(struct ethhdr); +- void *data = (void *)(long)skb->data; +- const void *data_end = (void *)(long)skb->data_end; +- const struct ethhdr *const eth = data; // used iff is_ethernet +- const struct iphdr *const ip4 = (void *)(eth + 1); +- +- // Must be meta-ethernet IPv4 frame +- if (skb->protocol != bpf_htons(ETH_P_IP)) +- return TC_ACT_OK; +- +- // Must have ipv4 header +- if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) +- return TC_ACT_OK; +- +- // Ethertype - if present - must be IPv4 +- if (eth->h_proto != bpf_htons(ETH_P_IP)) +- return TC_ACT_OK; +- +- // IP version must be 4 +- if (ip4->version != 4) +- return TC_ACT_OK; +- +- // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header +- if (ip4->ihl != 5) +- return TC_ACT_OK; +- +- // Maximum IPv6 payload length that can be translated to IPv4 +- if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) +- return TC_ACT_OK; +- +- // Calculate the IPv4 one's complement checksum of the IPv4 header. +- __wsum sum4 = 0; +- +- for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) +- sum4 += ((__u16 *)ip4)[i]; +- +- // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 +- sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE +- sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 +- // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF +- if (sum4 != 0xFFFF) +- return TC_ACT_OK; +- +- // Minimum IPv4 total length is the size of the header +- if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) +- return TC_ACT_OK; +- +- // We are incapable of dealing with IPv4 fragments +- if (ip4->frag_off & ~bpf_htons(IP_DF)) +- return TC_ACT_OK; +- +- switch (ip4->protocol) { +- case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 +- case IPPROTO_GRE: // address means there is no need to update their checksums. +- case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, +- break; // since there is never a checksum to update. +- +- case IPPROTO_UDP: // See above comment, but must also have UDP header... +- if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) +- return TC_ACT_OK; +- const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); +- // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the +- // checksum. Otherwise the network or more likely the NAT64 gateway might +- // drop the packet because in most cases IPv6/UDP packets with a zero checksum +- // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() +- if (!uh->check) +- return TC_ACT_OK; +- break; +- +- default: // do not know how to handle anything else +- return TC_ACT_OK; +- } +- struct ethhdr eth2; // used iff is_ethernet +- +- eth2 = *eth; // Copy over the ethernet header (src/dst mac) +- eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype +- +- struct ipv6hdr ip6 = { +- .version = 6, // __u8:4 +- .priority = ip4->tos >> 4, // __u8:4 +- .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] +- .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 +- .nexthdr = ip4->protocol, // __u8 +- .hop_limit = ip4->ttl, // __u8 +- }; +- ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); +- ip6.saddr.in6_u.u6_addr32[1] = 0; +- ip6.saddr.in6_u.u6_addr32[2] = 0; +- ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); +- ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); +- ip6.daddr.in6_u.u6_addr32[1] = 0; +- ip6.daddr.in6_u.u6_addr32[2] = 0; +- ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); +- +- // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. +- __wsum sum6 = 0; +- // We'll end up with a non-zero sum due to ip6.version == 6 +- for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) +- sum6 += ((__u16 *)&ip6)[i]; +- +- // Packet mutations begin - point of no return, but if this first modification fails +- // the packet is probably still pristine, so let clatd handle it. +- if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) +- return TC_ACT_OK; +- +- // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. +- // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, +- // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. +- // However, we've already verified the ipv4 checksum is correct and thus 0. +- // Thus we only need to add the ipv6 header's sum. +- // +- // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error +- // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). +- bpf_csum_update(skb, sum6); +- +- // bpf_skb_change_proto() invalidates all pointers - reload them. +- data = (void *)(long)skb->data; +- data_end = (void *)(long)skb->data_end; +- +- // I cannot think of any valid way for this error condition to trigger, however I do +- // believe the explicit check is required to keep the in kernel ebpf verifier happy. +- if (data + l2_header_size + sizeof(ip6) > data_end) +- return TC_ACT_SHOT; +- +- struct ethhdr *new_eth = data; +- +- // Copy over the updated ethernet header +- *new_eth = eth2; +- // Copy over the new ipv4 header. +- *(struct ipv6hdr *)(new_eth + 1) = ip6; +- return TC_ACT_OK; +-} +- +-char _license[] SEC("license") = ("GPL"); +--- /dev/null ++++ b/tools/testing/selftests/net/nat6to4.c +@@ -0,0 +1,285 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * This code is taken from the Android Open Source Project and the author ++ * (Maciej Żenczykowski) has gave permission to relicense it under the ++ * GPLv2. Therefore this program is free software; ++ * You can redistribute it and/or modify it under the terms of the GNU ++ * General Public License version 2 as published by the Free Software ++ * Foundation ++ ++ * The original headers, including the original license headers, are ++ * included below for completeness. ++ * ++ * Copyright (C) 2019 The Android Open Source Project ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#include ++ ++#include ++#include ++ ++#define IP_DF 0x4000 // Flag: "Don't Fragment" ++ ++SEC("schedcls/ingress6/nat_6") ++int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) ++{ ++ const int l2_header_size = sizeof(struct ethhdr); ++ void *data = (void *)(long)skb->data; ++ const void *data_end = (void *)(long)skb->data_end; ++ const struct ethhdr * const eth = data; // used iff is_ethernet ++ const struct ipv6hdr * const ip6 = (void *)(eth + 1); ++ ++ // Require ethernet dst mac address to be our unicast address. ++ if (skb->pkt_type != PACKET_HOST) ++ return TC_ACT_OK; ++ ++ // Must be meta-ethernet IPv6 frame ++ if (skb->protocol != bpf_htons(ETH_P_IPV6)) ++ return TC_ACT_OK; ++ ++ // Must have (ethernet and) ipv6 header ++ if (data + l2_header_size + sizeof(*ip6) > data_end) ++ return TC_ACT_OK; ++ ++ // Ethertype - if present - must be IPv6 ++ if (eth->h_proto != bpf_htons(ETH_P_IPV6)) ++ return TC_ACT_OK; ++ ++ // IP version must be 6 ++ if (ip6->version != 6) ++ return TC_ACT_OK; ++ // Maximum IPv6 payload length that can be translated to IPv4 ++ if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) ++ return TC_ACT_OK; ++ switch (ip6->nexthdr) { ++ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 ++ case IPPROTO_UDP: // address means there is no need to update their checksums. ++ case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, ++ case IPPROTO_ESP: // since there is never a checksum to update. ++ break; ++ default: // do not know how to handle anything else ++ return TC_ACT_OK; ++ } ++ ++ struct ethhdr eth2; // used iff is_ethernet ++ ++ eth2 = *eth; // Copy over the ethernet header (src/dst mac) ++ eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype ++ ++ struct iphdr ip = { ++ .version = 4, // u4 ++ .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 ++ .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 ++ .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 ++ .id = 0, // u16 ++ .frag_off = bpf_htons(IP_DF), // u16 ++ .ttl = ip6->hop_limit, // u8 ++ .protocol = ip6->nexthdr, // u8 ++ .check = 0, // u16 ++ .saddr = 0x0201a8c0, // u32 ++ .daddr = 0x0101a8c0, // u32 ++ }; ++ ++ // Calculate the IPv4 one's complement checksum of the IPv4 header. ++ __wsum sum4 = 0; ++ ++ for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) ++ sum4 += ((__u16 *)&ip)[i]; ++ ++ // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 ++ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE ++ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 ++ ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF ++ ++ // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. ++ __wsum sum6 = 0; ++ // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) ++ for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) ++ sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation ++ ++ // Note that there is no L4 checksum update: we are relying on the checksum neutrality ++ // of the ipv6 address chosen by netd's ClatdController. ++ ++ // Packet mutations begin - point of no return, but if this first modification fails ++ // the packet is probably still pristine, so let clatd handle it. ++ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) ++ return TC_ACT_OK; ++ bpf_csum_update(skb, sum6); ++ ++ data = (void *)(long)skb->data; ++ data_end = (void *)(long)skb->data_end; ++ if (data + l2_header_size + sizeof(struct iphdr) > data_end) ++ return TC_ACT_SHOT; ++ ++ struct ethhdr *new_eth = data; ++ ++ // Copy over the updated ethernet header ++ *new_eth = eth2; ++ ++ // Copy over the new ipv4 header. ++ *(struct iphdr *)(new_eth + 1) = ip; ++ return bpf_redirect(skb->ifindex, BPF_F_INGRESS); ++} ++ ++SEC("schedcls/egress4/snat4") ++int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) ++{ ++ const int l2_header_size = sizeof(struct ethhdr); ++ void *data = (void *)(long)skb->data; ++ const void *data_end = (void *)(long)skb->data_end; ++ const struct ethhdr *const eth = data; // used iff is_ethernet ++ const struct iphdr *const ip4 = (void *)(eth + 1); ++ ++ // Must be meta-ethernet IPv4 frame ++ if (skb->protocol != bpf_htons(ETH_P_IP)) ++ return TC_ACT_OK; ++ ++ // Must have ipv4 header ++ if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) ++ return TC_ACT_OK; ++ ++ // Ethertype - if present - must be IPv4 ++ if (eth->h_proto != bpf_htons(ETH_P_IP)) ++ return TC_ACT_OK; ++ ++ // IP version must be 4 ++ if (ip4->version != 4) ++ return TC_ACT_OK; ++ ++ // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header ++ if (ip4->ihl != 5) ++ return TC_ACT_OK; ++ ++ // Maximum IPv6 payload length that can be translated to IPv4 ++ if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) ++ return TC_ACT_OK; ++ ++ // Calculate the IPv4 one's complement checksum of the IPv4 header. ++ __wsum sum4 = 0; ++ ++ for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) ++ sum4 += ((__u16 *)ip4)[i]; ++ ++ // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 ++ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE ++ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 ++ // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF ++ if (sum4 != 0xFFFF) ++ return TC_ACT_OK; ++ ++ // Minimum IPv4 total length is the size of the header ++ if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) ++ return TC_ACT_OK; ++ ++ // We are incapable of dealing with IPv4 fragments ++ if (ip4->frag_off & ~bpf_htons(IP_DF)) ++ return TC_ACT_OK; ++ ++ switch (ip4->protocol) { ++ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 ++ case IPPROTO_GRE: // address means there is no need to update their checksums. ++ case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, ++ break; // since there is never a checksum to update. ++ ++ case IPPROTO_UDP: // See above comment, but must also have UDP header... ++ if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) ++ return TC_ACT_OK; ++ const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); ++ // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the ++ // checksum. Otherwise the network or more likely the NAT64 gateway might ++ // drop the packet because in most cases IPv6/UDP packets with a zero checksum ++ // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() ++ if (!uh->check) ++ return TC_ACT_OK; ++ break; ++ ++ default: // do not know how to handle anything else ++ return TC_ACT_OK; ++ } ++ struct ethhdr eth2; // used iff is_ethernet ++ ++ eth2 = *eth; // Copy over the ethernet header (src/dst mac) ++ eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype ++ ++ struct ipv6hdr ip6 = { ++ .version = 6, // __u8:4 ++ .priority = ip4->tos >> 4, // __u8:4 ++ .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] ++ .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 ++ .nexthdr = ip4->protocol, // __u8 ++ .hop_limit = ip4->ttl, // __u8 ++ }; ++ ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); ++ ip6.saddr.in6_u.u6_addr32[1] = 0; ++ ip6.saddr.in6_u.u6_addr32[2] = 0; ++ ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); ++ ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); ++ ip6.daddr.in6_u.u6_addr32[1] = 0; ++ ip6.daddr.in6_u.u6_addr32[2] = 0; ++ ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); ++ ++ // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. ++ __wsum sum6 = 0; ++ // We'll end up with a non-zero sum due to ip6.version == 6 ++ for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) ++ sum6 += ((__u16 *)&ip6)[i]; ++ ++ // Packet mutations begin - point of no return, but if this first modification fails ++ // the packet is probably still pristine, so let clatd handle it. ++ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) ++ return TC_ACT_OK; ++ ++ // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. ++ // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, ++ // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. ++ // However, we've already verified the ipv4 checksum is correct and thus 0. ++ // Thus we only need to add the ipv6 header's sum. ++ // ++ // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error ++ // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). ++ bpf_csum_update(skb, sum6); ++ ++ // bpf_skb_change_proto() invalidates all pointers - reload them. ++ data = (void *)(long)skb->data; ++ data_end = (void *)(long)skb->data_end; ++ ++ // I cannot think of any valid way for this error condition to trigger, however I do ++ // believe the explicit check is required to keep the in kernel ebpf verifier happy. ++ if (data + l2_header_size + sizeof(ip6) > data_end) ++ return TC_ACT_SHOT; ++ ++ struct ethhdr *new_eth = data; ++ ++ // Copy over the updated ethernet header ++ *new_eth = eth2; ++ // Copy over the new ipv4 header. ++ *(struct ipv6hdr *)(new_eth + 1) = ip6; ++ return TC_ACT_OK; ++} ++ ++char _license[] SEC("license") = ("GPL"); +--- a/tools/testing/selftests/net/udpgro_frglist.sh ++++ b/tools/testing/selftests/net/udpgro_frglist.sh +@@ -40,8 +40,8 @@ run_one() { + + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + tc -n "${PEER_NS}" qdisc add dev veth1 clsact +- tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action +- tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action ++ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action ++ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action + echo ${rx_args} + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + +@@ -88,8 +88,8 @@ if [ ! -f ${BPF_FILE} ]; then + exit -1 + fi + +-if [ ! -f bpf/nat6to4.o ]; then +- echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" ++if [ ! -f nat6to4.o ]; then ++ echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" + exit -1 + fi + diff --git a/queue-6.1/series b/queue-6.1/series index 9fa2a9b6afb..0792de319cb 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -99,3 +99,6 @@ drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch x86-fpu-invalidate-fpu-state-correctly-on-exec.patch x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch hwmon-aquacomputer_d5next-add-selective-200ms-delay-after-sending-ctrl-report.patch +selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch +nfs-use-vfs-setgid-helper.patch +nfsd-use-vfs-setgid-helper.patch