From: Greg Kroah-Hartman Date: Fri, 10 Dec 2021 15:54:42 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v4.4.295~55 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3c287108ac64a59f8ca0951da02dff83dc74dc9d;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: bonding-make-tx_rebalance_counter-an-atomic.patch bpf-fix-the-off-by-two-error-in-range-markings.patch ice-ignore-dropped-packets-during-init.patch nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch selftests-netfilter-add-a-vrf-conntrack-testcase.patch vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch --- diff --git a/queue-5.4/bonding-make-tx_rebalance_counter-an-atomic.patch b/queue-5.4/bonding-make-tx_rebalance_counter-an-atomic.patch new file mode 100644 index 00000000000..851ccf5c596 --- /dev/null +++ b/queue-5.4/bonding-make-tx_rebalance_counter-an-atomic.patch @@ -0,0 +1,132 @@ +From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 2 Dec 2021 18:27:18 -0800 +Subject: bonding: make tx_rebalance_counter an atomic + +From: Eric Dumazet + +commit dac8e00fb640e9569cdeefd3ce8a75639e5d0711 upstream. + +KCSAN reported a data-race [1] around tx_rebalance_counter +which can be accessed from different contexts, without +the protection of a lock/mutex. + +[1] +BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor + +write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0: + bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613 + bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949 + do_set_master net/core/rtnetlink.c:2521 [inline] + __rtnl_newlink net/core/rtnetlink.c:3475 [inline] + rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506 + rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 + netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491 + rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 + netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 + netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg net/socket.c:724 [inline] + ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 + ___sys_sendmsg net/socket.c:2463 [inline] + __sys_sendmsg+0x195/0x230 net/socket.c:2492 + __do_sys_sendmsg net/socket.c:2501 [inline] + __se_sys_sendmsg net/socket.c:2499 [inline] + __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1: + bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511 + process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 + worker_thread+0x616/0xa70 kernel/workqueue.c:2445 + kthread+0x2c7/0x2e0 kernel/kthread.c:327 + ret_from_fork+0x1f/0x30 + +value changed: 0x00000001 -> 0x00000064 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: bond1 bond_alb_monitor + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_alb.c | 14 ++++++++------ + include/net/bond_alb.h | 2 +- + 2 files changed, 9 insertions(+), 7 deletions(-) + +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -1514,14 +1514,14 @@ void bond_alb_monitor(struct work_struct + struct slave *slave; + + if (!bond_has_slaves(bond)) { +- bond_info->tx_rebalance_counter = 0; ++ atomic_set(&bond_info->tx_rebalance_counter, 0); + bond_info->lp_counter = 0; + goto re_arm; + } + + rcu_read_lock(); + +- bond_info->tx_rebalance_counter++; ++ atomic_inc(&bond_info->tx_rebalance_counter); + bond_info->lp_counter++; + + /* send learning packets */ +@@ -1543,7 +1543,7 @@ void bond_alb_monitor(struct work_struct + } + + /* rebalance tx traffic */ +- if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { ++ if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { + bond_for_each_slave_rcu(bond, slave, iter) { + tlb_clear_slave(bond, slave, 1); + if (slave == rcu_access_pointer(bond->curr_active_slave)) { +@@ -1553,7 +1553,7 @@ void bond_alb_monitor(struct work_struct + bond_info->unbalanced_load = 0; + } + } +- bond_info->tx_rebalance_counter = 0; ++ atomic_set(&bond_info->tx_rebalance_counter, 0); + } + + if (bond_info->rlb_enabled) { +@@ -1623,7 +1623,8 @@ int bond_alb_init_slave(struct bonding * + tlb_init_slave(slave); + + /* order a rebalance ASAP */ +- bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; ++ atomic_set(&bond->alb_info.tx_rebalance_counter, ++ BOND_TLB_REBALANCE_TICKS); + + if (bond->alb_info.rlb_enabled) + bond->alb_info.rlb_rebalance = 1; +@@ -1660,7 +1661,8 @@ void bond_alb_handle_link_change(struct + rlb_clear_slave(bond, slave); + } else if (link == BOND_LINK_UP) { + /* order a rebalance ASAP */ +- bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; ++ atomic_set(&bond_info->tx_rebalance_counter, ++ BOND_TLB_REBALANCE_TICKS); + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + /* If the updelay module parameter is smaller than the +--- a/include/net/bond_alb.h ++++ b/include/net/bond_alb.h +@@ -126,7 +126,7 @@ struct tlb_slave_info { + struct alb_bond_info { + struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ + u32 unbalanced_load; +- int tx_rebalance_counter; ++ atomic_t tx_rebalance_counter; + int lp_counter; + /* -------- rlb parameters -------- */ + int rlb_enabled; diff --git a/queue-5.4/bpf-fix-the-off-by-two-error-in-range-markings.patch b/queue-5.4/bpf-fix-the-off-by-two-error-in-range-markings.patch new file mode 100644 index 00000000000..174f56b9beb --- /dev/null +++ b/queue-5.4/bpf-fix-the-off-by-two-error-in-range-markings.patch @@ -0,0 +1,166 @@ +From 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 Mon Sep 17 00:00:00 2001 +From: Maxim Mikityanskiy +Date: Tue, 30 Nov 2021 20:16:07 +0200 +Subject: bpf: Fix the off-by-two error in range markings + +From: Maxim Mikityanskiy + +commit 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 upstream. + +The first commit cited below attempts to fix the off-by-one error that +appeared in some comparisons with an open range. Due to this error, +arithmetically equivalent pieces of code could get different verdicts +from the verifier, for example (pseudocode): + + // 1. Passes the verifier: + if (data + 8 > data_end) + return early + read *(u64 *)data, i.e. [data; data+7] + + // 2. Rejected by the verifier (should still pass): + if (data + 7 >= data_end) + return early + read *(u64 *)data, i.e. [data; data+7] + +The attempted fix, however, shifts the range by one in a wrong +direction, so the bug not only remains, but also such piece of code +starts failing in the verifier: + + // 3. Rejected by the verifier, but the check is stricter than in #1. + if (data + 8 >= data_end) + return early + read *(u64 *)data, i.e. [data; data+7] + +The change performed by that fix converted an off-by-one bug into +off-by-two. The second commit cited below added the BPF selftests +written to ensure than code chunks like #3 are rejected, however, +they should be accepted. + +This commit fixes the off-by-two error by adjusting new_range in the +right direction and fixes the tests by changing the range into the +one that should actually fail. + +Fixes: fb2a311a31d3 ("bpf: fix off by one for range markings with L{T, E} patterns") +Fixes: b37242c773b2 ("bpf: add test cases to bpf selftests to cover all access tests") +Signed-off-by: Maxim Mikityanskiy +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211130181607.593149-1-maximmi@nvidia.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 2 + tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c | 32 +++++----- + 2 files changed, 17 insertions(+), 17 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5372,7 +5372,7 @@ static void find_good_pkt_pointers(struc + + new_range = dst_reg->off; + if (range_right_open) +- new_range--; ++ new_range++; + + /* Examples for register markings: + * +--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c ++++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +@@ -112,10 +112,10 @@ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -167,10 +167,10 @@ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -274,9 +274,9 @@ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -437,9 +437,9 @@ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -544,10 +544,10 @@ + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -599,10 +599,10 @@ + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -706,9 +706,9 @@ + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +@@ -869,9 +869,9 @@ + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), +- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, diff --git a/queue-5.4/ice-ignore-dropped-packets-during-init.patch b/queue-5.4/ice-ignore-dropped-packets-during-init.patch new file mode 100644 index 00000000000..03d0625f287 --- /dev/null +++ b/queue-5.4/ice-ignore-dropped-packets-during-init.patch @@ -0,0 +1,39 @@ +From 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 Mon Sep 17 00:00:00 2001 +From: Jesse Brandeburg +Date: Fri, 22 Oct 2021 17:28:17 -0700 +Subject: ice: ignore dropped packets during init + +From: Jesse Brandeburg + +commit 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 upstream. + +If the hardware is constantly receiving unicast or broadcast packets +during driver load, the device previously counted many GLV_RDPC (VSI +dropped packets) events during init. This causes confusing dropped +packet statistics during driver load. The dropped packets counter +incrementing does stop once the driver finishes loading. + +Avoid this problem by baselining our statistics at the end of driver +open instead of the end of probe. + +Fixes: cdedef59deb0 ("ice: Configure VSIs for Tx/Rx") +Signed-off-by: Jesse Brandeburg +Tested-by: Gurucharan G +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -3561,6 +3561,9 @@ static int ice_up_complete(struct ice_vs + netif_carrier_on(vsi->netdev); + } + ++ /* clear this now, and the first stats read will be used as baseline */ ++ vsi->stat_offsets_loaded = false; ++ + ice_service_task_schedule(pf); + + return 0; diff --git a/queue-5.4/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch b/queue-5.4/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch new file mode 100644 index 00000000000..19d99e4a3c9 --- /dev/null +++ b/queue-5.4/nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch @@ -0,0 +1,37 @@ +From 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Thu, 9 Dec 2021 09:13:07 +0100 +Subject: nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done + +From: Krzysztof Kozlowski + +commit 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 upstream. + +The done() netlink callback nfc_genl_dump_ses_done() should check if +received argument is non-NULL, because its allocation could fail earlier +in dumpit() (nfc_genl_dump_ses()). + +Fixes: ac22ac466a65 ("NFC: Add a GET_SE netlink API") +Signed-off-by: Krzysztof Kozlowski +Link: https://lore.kernel.org/r/20211209081307.57337-1-krzysztof.kozlowski@canonical.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/nfc/netlink.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/nfc/netlink.c ++++ b/net/nfc/netlink.c +@@ -1400,8 +1400,10 @@ static int nfc_genl_dump_ses_done(struct + { + struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; + +- nfc_device_iter_exit(iter); +- kfree(iter); ++ if (iter) { ++ nfc_device_iter_exit(iter); ++ kfree(iter); ++ } + + return 0; + } diff --git a/queue-5.4/nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch b/queue-5.4/nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch new file mode 100644 index 00000000000..5220ad371fd --- /dev/null +++ b/queue-5.4/nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch @@ -0,0 +1,60 @@ +From c56c96303e9289cc34716b1179597b6f470833de Mon Sep 17 00:00:00 2001 +From: Jianglei Nie +Date: Thu, 9 Dec 2021 14:15:11 +0800 +Subject: nfp: Fix memory leak in nfp_cpp_area_cache_add() + +From: Jianglei Nie + +commit c56c96303e9289cc34716b1179597b6f470833de upstream. + +In line 800 (#1), nfp_cpp_area_alloc() allocates and initializes a +CPP area structure. But in line 807 (#2), when the cache is allocated +failed, this CPP area structure is not freed, which will result in +memory leak. + +We can fix it by freeing the CPP area when the cache is allocated +failed (#2). + +792 int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) +793 { +794 struct nfp_cpp_area_cache *cache; +795 struct nfp_cpp_area *area; + +800 area = nfp_cpp_area_alloc(cpp, NFP_CPP_ID(7, NFP_CPP_ACTION_RW, 0), +801 0, size); + // #1: allocates and initializes + +802 if (!area) +803 return -ENOMEM; + +805 cache = kzalloc(sizeof(*cache), GFP_KERNEL); +806 if (!cache) +807 return -ENOMEM; // #2: missing free + +817 return 0; +818 } + +Fixes: 4cb584e0ee7d ("nfp: add CPP access core") +Signed-off-by: Jianglei Nie +Acked-by: Simon Horman +Link: https://lore.kernel.org/r/20211209061511.122535-1-niejianglei2021@163.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c ++++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +@@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cp + return -ENOMEM; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); +- if (!cache) ++ if (!cache) { ++ nfp_cpp_area_free(area); + return -ENOMEM; ++ } + + cache->id = 0; + cache->addr = 0; diff --git a/queue-5.4/selftests-netfilter-add-a-vrf-conntrack-testcase.patch b/queue-5.4/selftests-netfilter-add-a-vrf-conntrack-testcase.patch new file mode 100644 index 00000000000..b3ce9462a1e --- /dev/null +++ b/queue-5.4/selftests-netfilter-add-a-vrf-conntrack-testcase.patch @@ -0,0 +1,255 @@ +From 33b8aad21ac175eba9577a73eb62b0aa141c241c Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 18 Oct 2021 14:38:13 +0200 +Subject: selftests: netfilter: add a vrf+conntrack testcase + +From: Florian Westphal + +commit 33b8aad21ac175eba9577a73eb62b0aa141c241c upstream. + +Rework the reproducer for the vrf+conntrack regression reported +by Eugene into a selftest and also add a test for ip masquerading +that Lahav fixed recently. + +With net or net-next tree, the first test fails and the latter +two pass. + +With 09e856d54bda5f28 ("vrf: Reset skb conntrack connection on VRF rcv") +reverted first test passes but the last two fail. + +A proper fix needs more work, for time being a revert seems to be +the best choice, snat/masquerade did not work before the fix. + +Link: https://lore.kernel.org/netdev/378ca299-4474-7e9a-3d36-2350c8c98995@gmail.com/T/#m95358a31810df7392f541f99d187227bc75c9963 +Reported-by: Eugene Crosser +Cc: Lahav Schlesinger +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/netfilter/conntrack_vrf.sh | 219 +++++++++++++++++++++ + 1 file changed, 219 insertions(+) + create mode 100755 tools/testing/selftests/netfilter/conntrack_vrf.sh + +--- /dev/null ++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh +@@ -0,0 +1,219 @@ ++#!/bin/sh ++ ++# This script demonstrates interaction of conntrack and vrf. ++# The vrf driver calls the netfilter hooks again, with oif/iif ++# pointing at the VRF device. ++# ++# For ingress, this means first iteration has iifname of lower/real ++# device. In this script, thats veth0. ++# Second iteration is iifname set to vrf device, tvrf in this script. ++# ++# For egress, this is reversed: first iteration has the vrf device, ++# second iteration is done with the lower/real/veth0 device. ++# ++# test_ct_zone_in demonstrates unexpected change of nftables ++# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack ++# connection on VRF rcv" ++# ++# It was possible to assign conntrack zone to a packet (or mark it for ++# `notracking`) in the prerouting chain before conntrack, based on real iif. ++# ++# After the change, the zone assignment is lost and the zone is assigned based ++# on the VRF master interface (in case such a rule exists). ++# assignment is lost. Instead, assignment based on the `iif` matching ++# Thus it is impossible to distinguish packets based on the original ++# interface. ++# ++# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem ++# that was supposed to be fixed by the commit mentioned above to make sure ++# that any fix to test case 1 won't break masquerade again. ++ ++ksft_skip=4 ++ ++IP0=172.30.30.1 ++IP1=172.30.30.2 ++PFXL=30 ++ret=0 ++ ++sfx=$(mktemp -u "XXXXXXXX") ++ns0="ns0-$sfx" ++ns1="ns1-$sfx" ++ ++cleanup() ++{ ++ ip netns pids $ns0 | xargs kill 2>/dev/null ++ ip netns pids $ns1 | xargs kill 2>/dev/null ++ ++ ip netns del $ns0 $ns1 ++} ++ ++nft --version > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not run test without nft tool" ++ exit $ksft_skip ++fi ++ ++ip -Version > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not run test without ip tool" ++ exit $ksft_skip ++fi ++ ++ip netns add "$ns0" ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not create net namespace $ns0" ++ exit $ksft_skip ++fi ++ip netns add "$ns1" ++ ++trap cleanup EXIT ++ ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 ++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 ++ ++ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not add veth device" ++ exit $ksft_skip ++fi ++ ++ip -net $ns0 li add tvrf type vrf table 9876 ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not add vrf device" ++ exit $ksft_skip ++fi ++ ++ip -net $ns0 li set lo up ++ ++ip -net $ns0 li set veth0 master tvrf ++ip -net $ns0 li set tvrf up ++ip -net $ns0 li set veth0 up ++ip -net $ns1 li set veth0 up ++ ++ip -net $ns0 addr add $IP0/$PFXL dev veth0 ++ip -net $ns1 addr add $IP1/$PFXL dev veth0 ++ ++ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& ++if [ $? -ne 0 ];then ++ echo "SKIP: Could not start iperf3" ++ exit $ksft_skip ++fi ++ ++# test vrf ingress handling. ++# The incoming connection should be placed in conntrack zone 1, ++# as decided by the first iteration of the ruleset. ++test_ct_zone_in() ++{ ++ip netns exec $ns0 nft -f - < /dev/null ++ ++ # should be in zone 1, not zone 2 ++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) ++ if [ $count -eq 1 ]; then ++ echo "PASS: entry found in conntrack zone 1" ++ else ++ echo "FAIL: entry not found in conntrack zone 1" ++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) ++ if [ $count -eq 1 ]; then ++ echo "FAIL: entry found in zone 2 instead" ++ else ++ echo "FAIL: entry not in zone 1 or 2, dumping table" ++ ip netns exec $ns0 conntrack -L ++ ip netns exec $ns0 nft list ruleset ++ fi ++ fi ++} ++ ++# add masq rule that gets evaluated w. outif set to vrf device. ++# This tests the first iteration of the packet through conntrack, ++# oifname is the vrf device. ++test_masquerade_vrf() ++{ ++ ip netns exec $ns0 conntrack -F 2>/dev/null ++ ++ip netns exec $ns0 nft -f - </dev/null ++ if [ $? -ne 0 ]; then ++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" ++ ret=1 ++ return ++ fi ++ ++ # must also check that nat table was evaluated on second (lower device) iteration. ++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' ++ if [ $? -eq 0 ]; then ++ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" ++ else ++ echo "FAIL: vrf masq rule has unexpected counter value" ++ ret=1 ++ fi ++} ++ ++# add masq rule that gets evaluated w. outif set to veth device. ++# This tests the 2nd iteration of the packet through conntrack, ++# oifname is the lower device (veth0 in this case). ++test_masquerade_veth() ++{ ++ ip netns exec $ns0 conntrack -F 2>/dev/null ++ip netns exec $ns0 nft -f - < /dev/null ++ if [ $? -ne 0 ]; then ++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" ++ ret=1 ++ return ++ fi ++ ++ # must also check that nat table was evaluated on second (lower device) iteration. ++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' ++ if [ $? -eq 0 ]; then ++ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" ++ else ++ echo "FAIL: vrf masq rule has unexpected counter value" ++ ret=1 ++ fi ++} ++ ++test_ct_zone_in ++test_masquerade_vrf ++test_masquerade_veth ++ ++exit $ret diff --git a/queue-5.4/series b/queue-5.4/series index a5748933cfa..02f05bea87f 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -12,3 +12,10 @@ hid-check-for-valid-usb-device-for-many-hid-drivers.patch can-kvaser_usb-get-can-clock-frequency-from-device.patch can-kvaser_pciefd-kvaser_pciefd_rx_error_frame-increase-correct-stats-rx-tx-_errors-counter.patch can-sja1000-fix-use-after-free-in-ems_pcmcia_add_card.patch +nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch +selftests-netfilter-add-a-vrf-conntrack-testcase.patch +vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch +bpf-fix-the-off-by-two-error-in-range-markings.patch +ice-ignore-dropped-packets-during-init.patch +bonding-make-tx_rebalance_counter-an-atomic.patch +nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch diff --git a/queue-5.4/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch b/queue-5.4/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch new file mode 100644 index 00000000000..76e6d18c305 --- /dev/null +++ b/queue-5.4/vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch @@ -0,0 +1,133 @@ +From d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Fri, 26 Nov 2021 15:36:12 +0100 +Subject: vrf: don't run conntrack on vrf with !dflt qdisc + +From: Nicolas Dichtel + +commit d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 upstream. + +After the below patch, the conntrack attached to skb is set to "notrack" in +the context of vrf device, for locally generated packets. +But this is true only when the default qdisc is set to the vrf device. When +changing the qdisc, notrack is not set anymore. +In fact, there is a shortcut in the vrf driver, when the default qdisc is +set, see commit dcdd43c41e60 ("net: vrf: performance improvements for +IPv4") for more details. + +This patch ensures that the behavior is always the same, whatever the qdisc +is. + +To demonstrate the difference, a new test is added in conntrack_vrf.sh. + +Fixes: 8c9c296adfae ("vrf: run conntrack only in context of lower/physdev for locally generated packets") +Signed-off-by: Nicolas Dichtel +Acked-by: Florian Westphal +Reviewed-by: David Ahern +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 8 ++--- + tools/testing/selftests/netfilter/conntrack_vrf.sh | 30 ++++++++++++++++++--- + 2 files changed, 30 insertions(+), 8 deletions(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -495,8 +495,6 @@ static struct sk_buff *vrf_ip6_out_direc + + skb->dev = vrf_dev; + +- vrf_nf_set_untracked(skb); +- + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); + +@@ -517,6 +515,8 @@ static struct sk_buff *vrf_ip6_out(struc + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + ++ vrf_nf_set_untracked(skb); ++ + if (qdisc_tx_is_default(vrf_dev) || + IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) + return vrf_ip6_out_direct(vrf_dev, sk, skb); +@@ -732,8 +732,6 @@ static struct sk_buff *vrf_ip_out_direct + + skb->dev = vrf_dev; + +- vrf_nf_set_untracked(skb); +- + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_ip_out_direct_finish); + +@@ -755,6 +753,8 @@ static struct sk_buff *vrf_ip_out(struct + ipv4_is_lbcast(ip_hdr(skb)->daddr)) + return skb; + ++ vrf_nf_set_untracked(skb); ++ + if (qdisc_tx_is_default(vrf_dev) || + IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) + return vrf_ip_out_direct(vrf_dev, sk, skb); +--- a/tools/testing/selftests/netfilter/conntrack_vrf.sh ++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh +@@ -150,11 +150,27 @@ EOF + # oifname is the vrf device. + test_masquerade_vrf() + { ++ local qdisc=$1 ++ ++ if [ "$qdisc" != "default" ]; then ++ tc -net $ns0 qdisc add dev tvrf root $qdisc ++ fi ++ + ip netns exec $ns0 conntrack -F 2>/dev/null + + ip netns exec $ns0 nft -f - <