From: Greg Kroah-Hartman Date: Fri, 10 Dec 2021 15:40:02 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.4.295~60 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8c21cba736f0757e8d7f6c4b2e59821244c8abc1;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: bpf-sockmap-attach-map-progs-to-psock-early-for-feature-probes.patch bpf-x86-fix-no-previous-prototype-warning.patch nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch selftests-netfilter-add-a-vrf-conntrack-testcase.patch vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch --- diff --git a/queue-5.15/bpf-sockmap-attach-map-progs-to-psock-early-for-feature-probes.patch b/queue-5.15/bpf-sockmap-attach-map-progs-to-psock-early-for-feature-probes.patch new file mode 100644 index 00000000000..d9e861d034d --- /dev/null +++ b/queue-5.15/bpf-sockmap-attach-map-progs-to-psock-early-for-feature-probes.patch @@ -0,0 +1,74 @@ +From 38207a5e81230d6ffbdd51e5fa5681be5116dcae Mon Sep 17 00:00:00 2001 +From: John Fastabend +Date: Fri, 19 Nov 2021 10:14:17 -0800 +Subject: bpf, sockmap: Attach map progs to psock early for feature probes + +From: John Fastabend + +commit 38207a5e81230d6ffbdd51e5fa5681be5116dcae upstream. + +When a TCP socket is added to a sock map we look at the programs attached +to the map to determine what proto op hooks need to be changed. Before +the patch in the 'fixes' tag there were only two categories -- the empty +set of programs or a TX policy. In any case the base set handled the +receive case. + +After the fix we have an optimized program for receive that closes a small, +but possible, race on receive. This program is loaded only when the map the +psock is being added to includes a RX policy. Otherwise, the race is not +possible so we don't need to handle the race condition. + +In order for the call to sk_psock_init() to correctly evaluate the above +conditions all progs need to be set in the psock before the call. However, +in the current code this is not the case. We end up evaluating the +requirements on the old prog state. If your psock is attached to multiple +maps -- for example a tx map and rx map -- then the second update would pull +in the correct maps. But, the other pattern with a single rx enabled map +the correct receive hooks are not used. The result is the race fixed by the +patch in the fixes tag below may still be seen in this case. + +To fix we simply set all psock->progs before doing the call into +sock_map_init(). With this the init() call gets the full list of programs +and chooses the correct proto ops on the first iteration instead of +requiring the second update to pull them in. This fixes the race case when +only a single map is used. + +Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") +Signed-off-by: John Fastabend +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211119181418.353932-2-john.fastabend@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_map.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -282,6 +282,12 @@ static int sock_map_link(struct bpf_map + + if (msg_parser) + psock_set_prog(&psock->progs.msg_parser, msg_parser); ++ if (stream_parser) ++ psock_set_prog(&psock->progs.stream_parser, stream_parser); ++ if (stream_verdict) ++ psock_set_prog(&psock->progs.stream_verdict, stream_verdict); ++ if (skb_verdict) ++ psock_set_prog(&psock->progs.skb_verdict, skb_verdict); + + ret = sock_map_init_proto(sk, psock); + if (ret < 0) +@@ -292,14 +298,10 @@ static int sock_map_link(struct bpf_map + ret = sk_psock_init_strp(sk, psock); + if (ret) + goto out_unlock_drop; +- psock_set_prog(&psock->progs.stream_verdict, stream_verdict); +- psock_set_prog(&psock->progs.stream_parser, stream_parser); + sk_psock_start_strp(sk, psock); + } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { +- psock_set_prog(&psock->progs.stream_verdict, stream_verdict); + sk_psock_start_verdict(sk,psock); + } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) { +- psock_set_prog(&psock->progs.skb_verdict, skb_verdict); + sk_psock_start_verdict(sk, psock); + } + write_unlock_bh(&sk->sk_callback_lock); diff --git a/queue-5.15/bpf-x86-fix-no-previous-prototype-warning.patch b/queue-5.15/bpf-x86-fix-no-previous-prototype-warning.patch new file mode 100644 index 00000000000..f1f9ac4e027 --- /dev/null +++ b/queue-5.15/bpf-x86-fix-no-previous-prototype-warning.patch @@ -0,0 +1,43 @@ +From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= +Date: Wed, 17 Nov 2021 13:57:08 +0100 +Subject: bpf, x86: Fix "no previous prototype" warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +commit f45b2974cc0ae959a4c503a071e38a56bd64372f upstream. + +The arch_prepare_bpf_dispatcher function does not have a prototype, and +yields the following warning when W=1 is enabled for the kernel build. + + >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \ + prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes] + 2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \ + int num_funcs) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove the warning by adding a function declaration to include/linux/bpf.h. + +Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher") +Reported-by: kernel test robot +Signed-off-by: Björn Töpel +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -723,6 +723,7 @@ int bpf_trampoline_unlink_prog(struct bp + struct bpf_trampoline *bpf_trampoline_get(u64 key, + struct bpf_attach_target_info *tgt_info); + void bpf_trampoline_put(struct bpf_trampoline *tr); ++int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); + #define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ diff --git a/queue-5.15/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch b/queue-5.15/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch new file mode 100644 index 00000000000..28a60f5a964 --- /dev/null +++ b/queue-5.15/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch @@ -0,0 +1,37 @@ +From 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Thu, 9 Dec 2021 09:13:07 +0100 +Subject: nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done + +From: Krzysztof Kozlowski + +commit 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 upstream. + +The done() netlink callback nfc_genl_dump_ses_done() should check if +received argument is non-NULL, because its allocation could fail earlier +in dumpit() (nfc_genl_dump_ses()). + +Fixes: ac22ac466a65 ("NFC: Add a GET_SE netlink API") +Signed-off-by: Krzysztof Kozlowski +Link: https://lore.kernel.org/r/20211209081307.57337-1-krzysztof.kozlowski@canonical.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/nfc/netlink.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/nfc/netlink.c ++++ b/net/nfc/netlink.c +@@ -1392,8 +1392,10 @@ static int nfc_genl_dump_ses_done(struct + { + struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; + +- nfc_device_iter_exit(iter); +- kfree(iter); ++ if (iter) { ++ nfc_device_iter_exit(iter); ++ kfree(iter); ++ } + + return 0; + } diff --git a/queue-5.15/selftests-netfilter-add-a-vrf-conntrack-testcase.patch b/queue-5.15/selftests-netfilter-add-a-vrf-conntrack-testcase.patch new file mode 100644 index 00000000000..e2da970b3f8 --- /dev/null +++ b/queue-5.15/selftests-netfilter-add-a-vrf-conntrack-testcase.patch @@ -0,0 +1,268 @@ +From 33b8aad21ac175eba9577a73eb62b0aa141c241c Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 18 Oct 2021 14:38:13 +0200 +Subject: selftests: netfilter: add a vrf+conntrack testcase + +From: Florian Westphal + +commit 33b8aad21ac175eba9577a73eb62b0aa141c241c upstream. + +Rework the reproducer for the vrf+conntrack regression reported +by Eugene into a selftest and also add a test for ip masquerading +that Lahav fixed recently. + +With net or net-next tree, the first test fails and the latter +two pass. + +With 09e856d54bda5f28 ("vrf: Reset skb conntrack connection on VRF rcv") +reverted first test passes but the last two fail. + +A proper fix needs more work, for time being a revert seems to be +the best choice, snat/masquerade did not work before the fix. + +Link: https://lore.kernel.org/netdev/378ca299-4474-7e9a-3d36-2350c8c98995@gmail.com/T/#m95358a31810df7392f541f99d187227bc75c9963 +Reported-by: Eugene Crosser +Cc: Lahav Schlesinger +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/netfilter/Makefile | 3 + tools/testing/selftests/netfilter/conntrack_vrf.sh | 219 +++++++++++++++++++++ + 2 files changed, 221 insertions(+), 1 deletion(-) + create mode 100755 tools/testing/selftests/netfilter/conntrack_vrf.sh + +--- a/tools/testing/selftests/netfilter/Makefile ++++ b/tools/testing/selftests/netfilter/Makefile +@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fi + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ + nft_concat_range.sh nft_conntrack_helper.sh \ + nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ +- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh ++ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ ++ conntrack_vrf.sh + + LDLIBS = -lmnl + TEST_GEN_FILES = nf-queue +--- /dev/null ++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh +@@ -0,0 +1,219 @@ ++#!/bin/sh ++ ++# This script demonstrates interaction of conntrack and vrf. ++# The vrf driver calls the netfilter hooks again, with oif/iif ++# pointing at the VRF device. ++# ++# For ingress, this means first iteration has iifname of lower/real ++# device. In this script, thats veth0. ++# Second iteration is iifname set to vrf device, tvrf in this script. ++# ++# For egress, this is reversed: first iteration has the vrf device, ++# second iteration is done with the lower/real/veth0 device. ++# ++# test_ct_zone_in demonstrates unexpected change of nftables ++# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack ++# connection on VRF rcv" ++# ++# It was possible to assign conntrack zone to a packet (or mark it for ++# `notracking`) in the prerouting chain before conntrack, based on real iif. ++# ++# After the change, the zone assignment is lost and the zone is assigned based ++# on the VRF master interface (in case such a rule exists). ++# assignment is lost. Instead, assignment based on the `iif` matching ++# Thus it is impossible to distinguish packets based on the original ++# interface. ++# ++# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem ++# that was supposed to be fixed by the commit mentioned above to make sure ++# that any fix to test case 1 won't break masquerade again. ++ ++ksft_skip=4 ++ ++IP0=172.30.30.1 ++IP1=172.30.30.2 ++PFXL=30 ++ret=0 ++ ++sfx=$(mktemp -u "XXXXXXXX") ++ns0="ns0-$sfx" ++ns1="ns1-$sfx" ++ ++cleanup() ++{ ++ ip netns pids $ns0 | xargs kill 2>/dev/null ++ ip netns pids $ns1 | xargs kill 2>/dev/null ++ ++ ip netns del $ns0 $ns1 ++} ++ ++nft --version > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not run test without nft tool" ++ exit $ksft_skip ++fi ++ ++ip -Version > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not run test without ip tool" ++ exit $ksft_skip ++fi ++ ++ip netns add "$ns0" ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not create net namespace $ns0" ++ exit $ksft_skip ++fi ++ip netns add "$ns1" ++ ++trap cleanup EXIT ++ ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 ++ ++ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not add veth device" ++ exit $ksft_skip ++fi ++ ++ip -net $ns0 li add tvrf type vrf table 9876 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not add vrf device" ++ exit $ksft_skip ++fi ++ ++ip -net $ns0 li set lo up ++ ++ip -net $ns0 li set veth0 master tvrf ++ip -net $ns0 li set tvrf up ++ip -net $ns0 li set veth0 up ++ip -net $ns1 li set veth0 up ++ ++ip -net $ns0 addr add $IP0/$PFXL dev veth0 ++ip -net $ns1 addr add $IP1/$PFXL dev veth0 ++ ++ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not start iperf3" ++ exit $ksft_skip ++fi ++ ++# test vrf ingress handling. ++# The incoming connection should be placed in conntrack zone 1, ++# as decided by the first iteration of the ruleset. ++test_ct_zone_in() ++{ ++ip netns exec $ns0 nft -f - < /dev/null ++ ++ # should be in zone 1, not zone 2 ++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) ++ if [ $count -eq 1 ]; then ++ echo "PASS: entry found in conntrack zone 1" ++ else ++ echo "FAIL: entry not found in conntrack zone 1" ++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) ++ if [ $count -eq 1 ]; then ++ echo "FAIL: entry found in zone 2 instead" ++ else ++ echo "FAIL: entry not in zone 1 or 2, dumping table" ++ ip netns exec $ns0 conntrack -L ++ ip netns exec $ns0 nft list ruleset ++ fi ++ fi ++} ++ ++# add masq rule that gets evaluated w. outif set to vrf device. ++# This tests the first iteration of the packet through conntrack, ++# oifname is the vrf device. ++test_masquerade_vrf() ++{ ++ ip netns exec $ns0 conntrack -F 2>/dev/null ++ ++ip netns exec $ns0 nft -f - </dev/null ++ if [ $? -ne 0 ]; then ++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" ++ ret=1 ++ return ++ fi ++ ++ # must also check that nat table was evaluated on second (lower device) iteration. ++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' ++ if [ $? -eq 0 ]; then ++ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" ++ else ++ echo "FAIL: vrf masq rule has unexpected counter value" ++ ret=1 ++ fi ++} ++ ++# add masq rule that gets evaluated w. outif set to veth device. ++# This tests the 2nd iteration of the packet through conntrack, ++# oifname is the lower device (veth0 in this case). ++test_masquerade_veth() ++{ ++ ip netns exec $ns0 conntrack -F 2>/dev/null ++ip netns exec $ns0 nft -f - < /dev/null ++ if [ $? -ne 0 ]; then ++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" ++ ret=1 ++ return ++ fi ++ ++ # must also check that nat table was evaluated on second (lower device) iteration. ++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' ++ if [ $? -eq 0 ]; then ++ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" ++ else ++ echo "FAIL: vrf masq rule has unexpected counter value" ++ ret=1 ++ fi ++} ++ ++test_ct_zone_in ++test_masquerade_vrf ++test_masquerade_veth ++ ++exit $ret diff --git a/queue-5.15/series b/queue-5.15/series index f9ba4873b4d..54316f08e10 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -30,3 +30,8 @@ net-dsa-mv88e6xxx-fix-don-t-use-phy_detect-on-internal-phy-s.patch net-dsa-mv88e6xxx-allow-use-of-phys-on-cpu-and-dsa-ports.patch x86-sme-explicitly-map-new-efi-memmap-table-as-encrypted.patch platform-x86-amd-pmc-fix-s2idle-failures-on-certain-amd-laptops.patch +nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch +selftests-netfilter-add-a-vrf-conntrack-testcase.patch +vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch +bpf-x86-fix-no-previous-prototype-warning.patch +bpf-sockmap-attach-map-progs-to-psock-early-for-feature-probes.patch diff --git a/queue-5.15/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch b/queue-5.15/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch new file mode 100644 index 00000000000..6145476e8fc --- /dev/null +++ b/queue-5.15/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch @@ -0,0 +1,133 @@ +From d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Fri, 26 Nov 2021 15:36:12 +0100 +Subject: vrf: don't run conntrack on vrf with !dflt qdisc + +From: Nicolas Dichtel + +commit d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 upstream. + +After the below patch, the conntrack attached to skb is set to "notrack" in +the context of vrf device, for locally generated packets. +But this is true only when the default qdisc is set to the vrf device. When +changing the qdisc, notrack is not set anymore. +In fact, there is a shortcut in the vrf driver, when the default qdisc is +set, see commit dcdd43c41e60 ("net: vrf: performance improvements for +IPv4") for more details. + +This patch ensures that the behavior is always the same, whatever the qdisc +is. + +To demonstrate the difference, a new test is added in conntrack_vrf.sh. + +Fixes: 8c9c296adfae ("vrf: run conntrack only in context of lower/physdev for locally generated packets") +Signed-off-by: Nicolas Dichtel +Acked-by: Florian Westphal +Reviewed-by: David Ahern +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 8 ++--- + tools/testing/selftests/netfilter/conntrack_vrf.sh | 30 ++++++++++++++++++--- + 2 files changed, 30 insertions(+), 8 deletions(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direc + + skb->dev = vrf_dev; + +- vrf_nf_set_untracked(skb); +- + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); + +@@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struc + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + ++ vrf_nf_set_untracked(skb); ++ + if (qdisc_tx_is_default(vrf_dev) || + IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) + return vrf_ip6_out_direct(vrf_dev, sk, skb); +@@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct + + skb->dev = vrf_dev; + +- vrf_nf_set_untracked(skb); +- + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_ip_out_direct_finish); + +@@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct + ipv4_is_lbcast(ip_hdr(skb)->daddr)) + return skb; + ++ vrf_nf_set_untracked(skb); ++ + if (qdisc_tx_is_default(vrf_dev) || + IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) + return vrf_ip_out_direct(vrf_dev, sk, skb); +--- a/tools/testing/selftests/netfilter/conntrack_vrf.sh ++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh +@@ -150,11 +150,27 @@ EOF + # oifname is the vrf device. + test_masquerade_vrf() + { ++ local qdisc=$1 ++ ++ if [ "$qdisc" != "default" ]; then ++ tc -net $ns0 qdisc add dev tvrf root $qdisc ++ fi ++ + ip netns exec $ns0 conntrack -F 2>/dev/null + + ip netns exec $ns0 nft -f - <