--- /dev/null
+From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 2 Dec 2021 18:27:18 -0800
+Subject: bonding: make tx_rebalance_counter an atomic
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit dac8e00fb640e9569cdeefd3ce8a75639e5d0711 upstream.
+
+KCSAN reported a data-race [1] around tx_rebalance_counter
+which can be accessed from different contexts, without
+the protection of a lock/mutex.
+
+[1]
+BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor
+
+write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0:
+ bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613
+ bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949
+ do_set_master net/core/rtnetlink.c:2521 [inline]
+ __rtnl_newlink net/core/rtnetlink.c:3475 [inline]
+ rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506
+ rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571
+ netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491
+ rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589
+ netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345
+ netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916
+ sock_sendmsg_nosec net/socket.c:704 [inline]
+ sock_sendmsg net/socket.c:724 [inline]
+ ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
+ ___sys_sendmsg net/socket.c:2463 [inline]
+ __sys_sendmsg+0x195/0x230 net/socket.c:2492
+ __do_sys_sendmsg net/socket.c:2501 [inline]
+ __se_sys_sendmsg net/socket.c:2499 [inline]
+ __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1:
+ bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511
+ process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
+ worker_thread+0x616/0xa70 kernel/workqueue.c:2445
+ kthread+0x2c7/0x2e0 kernel/kthread.c:327
+ ret_from_fork+0x1f/0x30
+
+value changed: 0x00000001 -> 0x00000064
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: bond1 bond_alb_monitor
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_alb.c | 14 ++++++++------
+ include/net/bond_alb.h | 2 +-
+ 2 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/bonding/bond_alb.c
++++ b/drivers/net/bonding/bond_alb.c
+@@ -1514,14 +1514,14 @@ void bond_alb_monitor(struct work_struct
+ struct slave *slave;
+
+ if (!bond_has_slaves(bond)) {
+- bond_info->tx_rebalance_counter = 0;
++ atomic_set(&bond_info->tx_rebalance_counter, 0);
+ bond_info->lp_counter = 0;
+ goto re_arm;
+ }
+
+ rcu_read_lock();
+
+- bond_info->tx_rebalance_counter++;
++ atomic_inc(&bond_info->tx_rebalance_counter);
+ bond_info->lp_counter++;
+
+ /* send learning packets */
+@@ -1543,7 +1543,7 @@ void bond_alb_monitor(struct work_struct
+ }
+
+ /* rebalance tx traffic */
+- if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
++ if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ tlb_clear_slave(bond, slave, 1);
+ if (slave == rcu_access_pointer(bond->curr_active_slave)) {
+@@ -1553,7 +1553,7 @@ void bond_alb_monitor(struct work_struct
+ bond_info->unbalanced_load = 0;
+ }
+ }
+- bond_info->tx_rebalance_counter = 0;
++ atomic_set(&bond_info->tx_rebalance_counter, 0);
+ }
+
+ if (bond_info->rlb_enabled) {
+@@ -1623,7 +1623,8 @@ int bond_alb_init_slave(struct bonding *
+ tlb_init_slave(slave);
+
+ /* order a rebalance ASAP */
+- bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
++ atomic_set(&bond->alb_info.tx_rebalance_counter,
++ BOND_TLB_REBALANCE_TICKS);
+
+ if (bond->alb_info.rlb_enabled)
+ bond->alb_info.rlb_rebalance = 1;
+@@ -1660,7 +1661,8 @@ void bond_alb_handle_link_change(struct
+ rlb_clear_slave(bond, slave);
+ } else if (link == BOND_LINK_UP) {
+ /* order a rebalance ASAP */
+- bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
++ atomic_set(&bond_info->tx_rebalance_counter,
++ BOND_TLB_REBALANCE_TICKS);
+ if (bond->alb_info.rlb_enabled) {
+ bond->alb_info.rlb_rebalance = 1;
+ /* If the updelay module parameter is smaller than the
+--- a/include/net/bond_alb.h
++++ b/include/net/bond_alb.h
+@@ -126,7 +126,7 @@ struct tlb_slave_info {
+ struct alb_bond_info {
+ struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
+ u32 unbalanced_load;
+- int tx_rebalance_counter;
++ atomic_t tx_rebalance_counter;
+ int lp_counter;
+ /* -------- rlb parameters -------- */
+ int rlb_enabled;
--- /dev/null
+From 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 Mon Sep 17 00:00:00 2001
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+Date: Tue, 30 Nov 2021 20:16:07 +0200
+Subject: bpf: Fix the off-by-two error in range markings
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+commit 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 upstream.
+
+The first commit cited below attempts to fix the off-by-one error that
+appeared in some comparisons with an open range. Due to this error,
+arithmetically equivalent pieces of code could get different verdicts
+from the verifier, for example (pseudocode):
+
+ // 1. Passes the verifier:
+ if (data + 8 > data_end)
+ return early
+ read *(u64 *)data, i.e. [data; data+7]
+
+ // 2. Rejected by the verifier (should still pass):
+ if (data + 7 >= data_end)
+ return early
+ read *(u64 *)data, i.e. [data; data+7]
+
+The attempted fix, however, shifts the range by one in a wrong
+direction, so the bug not only remains, but also such piece of code
+starts failing in the verifier:
+
+ // 3. Rejected by the verifier, but the check is stricter than in #1.
+ if (data + 8 >= data_end)
+ return early
+ read *(u64 *)data, i.e. [data; data+7]
+
+The change performed by that fix converted an off-by-one bug into
+off-by-two. The second commit cited below added the BPF selftests
+written to ensure than code chunks like #3 are rejected, however,
+they should be accepted.
+
+This commit fixes the off-by-two error by adjusting new_range in the
+right direction and fixes the tests by changing the range into the
+one that should actually fail.
+
+Fixes: fb2a311a31d3 ("bpf: fix off by one for range markings with L{T, E} patterns")
+Fixes: b37242c773b2 ("bpf: add test cases to bpf selftests to cover all access tests")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20211130181607.593149-1-maximmi@nvidia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 2
+ tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c | 32 +++++-----
+ 2 files changed, 17 insertions(+), 17 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5372,7 +5372,7 @@ static void find_good_pkt_pointers(struc
+
+ new_range = dst_reg->off;
+ if (range_right_open)
+- new_range--;
++ new_range++;
+
+ /* Examples for register markings:
+ *
+--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
++++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+@@ -112,10 +112,10 @@
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -167,10 +167,10 @@
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -274,9 +274,9 @@
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -437,9 +437,9 @@
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -544,10 +544,10 @@
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -599,10 +599,10 @@
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -706,9 +706,9 @@
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -869,9 +869,9 @@
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
--- /dev/null
+From 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 Mon Sep 17 00:00:00 2001
+From: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Date: Fri, 22 Oct 2021 17:28:17 -0700
+Subject: ice: ignore dropped packets during init
+
+From: Jesse Brandeburg <jesse.brandeburg@intel.com>
+
+commit 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 upstream.
+
+If the hardware is constantly receiving unicast or broadcast packets
+during driver load, the device previously counted many GLV_RDPC (VSI
+dropped packets) events during init. This causes confusing dropped
+packet statistics during driver load. The dropped packets counter
+incrementing does stop once the driver finishes loading.
+
+Avoid this problem by baselining our statistics at the end of driver
+open instead of the end of probe.
+
+Fixes: cdedef59deb0 ("ice: Configure VSIs for Tx/Rx")
+Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Tested-by: Gurucharan G <gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -3561,6 +3561,9 @@ static int ice_up_complete(struct ice_vs
+ netif_carrier_on(vsi->netdev);
+ }
+
++ /* clear this now, and the first stats read will be used as baseline */
++ vsi->stat_offsets_loaded = false;
++
+ ice_service_task_schedule(pf);
+
+ return 0;
--- /dev/null
+From 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Thu, 9 Dec 2021 09:13:07 +0100
+Subject: nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+commit 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 upstream.
+
+The done() netlink callback nfc_genl_dump_ses_done() should check if
+received argument is non-NULL, because its allocation could fail earlier
+in dumpit() (nfc_genl_dump_ses()).
+
+Fixes: ac22ac466a65 ("NFC: Add a GET_SE netlink API")
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Link: https://lore.kernel.org/r/20211209081307.57337-1-krzysztof.kozlowski@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/nfc/netlink.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -1400,8 +1400,10 @@ static int nfc_genl_dump_ses_done(struct
+ {
+ struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
+
+- nfc_device_iter_exit(iter);
+- kfree(iter);
++ if (iter) {
++ nfc_device_iter_exit(iter);
++ kfree(iter);
++ }
+
+ return 0;
+ }
--- /dev/null
+From c56c96303e9289cc34716b1179597b6f470833de Mon Sep 17 00:00:00 2001
+From: Jianglei Nie <niejianglei2021@163.com>
+Date: Thu, 9 Dec 2021 14:15:11 +0800
+Subject: nfp: Fix memory leak in nfp_cpp_area_cache_add()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+commit c56c96303e9289cc34716b1179597b6f470833de upstream.
+
+In line 800 (#1), nfp_cpp_area_alloc() allocates and initializes a
+CPP area structure. But in line 807 (#2), when the cache is allocated
+failed, this CPP area structure is not freed, which will result in
+memory leak.
+
+We can fix it by freeing the CPP area when the cache is allocated
+failed (#2).
+
+792 int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size)
+793 {
+794 struct nfp_cpp_area_cache *cache;
+795 struct nfp_cpp_area *area;
+
+800 area = nfp_cpp_area_alloc(cpp, NFP_CPP_ID(7, NFP_CPP_ACTION_RW, 0),
+801 0, size);
+ // #1: allocates and initializes
+
+802 if (!area)
+803 return -ENOMEM;
+
+805 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+806 if (!cache)
+807 return -ENOMEM; // #2: missing free
+
+817 return 0;
+818 }
+
+Fixes: 4cb584e0ee7d ("nfp: add CPP access core")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Acked-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20211209061511.122535-1-niejianglei2021@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
++++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+@@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cp
+ return -ENOMEM;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+- if (!cache)
++ if (!cache) {
++ nfp_cpp_area_free(area);
+ return -ENOMEM;
++ }
+
+ cache->id = 0;
+ cache->addr = 0;
--- /dev/null
+From 33b8aad21ac175eba9577a73eb62b0aa141c241c Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Mon, 18 Oct 2021 14:38:13 +0200
+Subject: selftests: netfilter: add a vrf+conntrack testcase
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 33b8aad21ac175eba9577a73eb62b0aa141c241c upstream.
+
+Rework the reproducer for the vrf+conntrack regression reported
+by Eugene into a selftest and also add a test for ip masquerading
+that Lahav fixed recently.
+
+With net or net-next tree, the first test fails and the latter
+two pass.
+
+With 09e856d54bda5f28 ("vrf: Reset skb conntrack connection on VRF rcv")
+reverted first test passes but the last two fail.
+
+A proper fix needs more work, for time being a revert seems to be
+the best choice, snat/masquerade did not work before the fix.
+
+Link: https://lore.kernel.org/netdev/378ca299-4474-7e9a-3d36-2350c8c98995@gmail.com/T/#m95358a31810df7392f541f99d187227bc75c9963
+Reported-by: Eugene Crosser <crosser@average.org>
+Cc: Lahav Schlesinger <lschlesinger@drivenets.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/netfilter/conntrack_vrf.sh | 219 +++++++++++++++++++++
+ 1 file changed, 219 insertions(+)
+ create mode 100755 tools/testing/selftests/netfilter/conntrack_vrf.sh
+
+--- /dev/null
++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
+@@ -0,0 +1,219 @@
++#!/bin/sh
++
++# This script demonstrates interaction of conntrack and vrf.
++# The vrf driver calls the netfilter hooks again, with oif/iif
++# pointing at the VRF device.
++#
++# For ingress, this means first iteration has iifname of lower/real
++# device. In this script, thats veth0.
++# Second iteration is iifname set to vrf device, tvrf in this script.
++#
++# For egress, this is reversed: first iteration has the vrf device,
++# second iteration is done with the lower/real/veth0 device.
++#
++# test_ct_zone_in demonstrates unexpected change of nftables
++# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
++# connection on VRF rcv"
++#
++# It was possible to assign conntrack zone to a packet (or mark it for
++# `notracking`) in the prerouting chain before conntrack, based on real iif.
++#
++# After the change, the zone assignment is lost and the zone is assigned based
++# on the VRF master interface (in case such a rule exists).
++# assignment is lost. Instead, assignment based on the `iif` matching
++# Thus it is impossible to distinguish packets based on the original
++# interface.
++#
++# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
++# that was supposed to be fixed by the commit mentioned above to make sure
++# that any fix to test case 1 won't break masquerade again.
++
++ksft_skip=4
++
++IP0=172.30.30.1
++IP1=172.30.30.2
++PFXL=30
++ret=0
++
++sfx=$(mktemp -u "XXXXXXXX")
++ns0="ns0-$sfx"
++ns1="ns1-$sfx"
++
++cleanup()
++{
++ ip netns pids $ns0 | xargs kill 2>/dev/null
++ ip netns pids $ns1 | xargs kill 2>/dev/null
++
++ ip netns del $ns0 $ns1
++}
++
++nft --version > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not run test without nft tool"
++ exit $ksft_skip
++fi
++
++ip -Version > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not run test without ip tool"
++ exit $ksft_skip
++fi
++
++ip netns add "$ns0"
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not create net namespace $ns0"
++ exit $ksft_skip
++fi
++ip netns add "$ns1"
++
++trap cleanup EXIT
++
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
++
++ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not add veth device"
++ exit $ksft_skip
++fi
++
++ip -net $ns0 li add tvrf type vrf table 9876
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not add vrf device"
++ exit $ksft_skip
++fi
++
++ip -net $ns0 li set lo up
++
++ip -net $ns0 li set veth0 master tvrf
++ip -net $ns0 li set tvrf up
++ip -net $ns0 li set veth0 up
++ip -net $ns1 li set veth0 up
++
++ip -net $ns0 addr add $IP0/$PFXL dev veth0
++ip -net $ns1 addr add $IP1/$PFXL dev veth0
++
++ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not start iperf3"
++ exit $ksft_skip
++fi
++
++# test vrf ingress handling.
++# The incoming connection should be placed in conntrack zone 1,
++# as decided by the first iteration of the ruleset.
++test_ct_zone_in()
++{
++ip netns exec $ns0 nft -f - <<EOF
++table testct {
++ chain rawpre {
++ type filter hook prerouting priority raw;
++
++ iif { veth0, tvrf } counter meta nftrace set 1
++ iif veth0 counter ct zone set 1 counter return
++ iif tvrf counter ct zone set 2 counter return
++ ip protocol icmp counter
++ notrack counter
++ }
++
++ chain rawout {
++ type filter hook output priority raw;
++
++ oif veth0 counter ct zone set 1 counter return
++ oif tvrf counter ct zone set 2 counter return
++ notrack counter
++ }
++}
++EOF
++ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
++
++ # should be in zone 1, not zone 2
++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
++ if [ $count -eq 1 ]; then
++ echo "PASS: entry found in conntrack zone 1"
++ else
++ echo "FAIL: entry not found in conntrack zone 1"
++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
++ if [ $count -eq 1 ]; then
++ echo "FAIL: entry found in zone 2 instead"
++ else
++ echo "FAIL: entry not in zone 1 or 2, dumping table"
++ ip netns exec $ns0 conntrack -L
++ ip netns exec $ns0 nft list ruleset
++ fi
++ fi
++}
++
++# add masq rule that gets evaluated w. outif set to vrf device.
++# This tests the first iteration of the packet through conntrack,
++# oifname is the vrf device.
++test_masquerade_vrf()
++{
++ ip netns exec $ns0 conntrack -F 2>/dev/null
++
++ip netns exec $ns0 nft -f - <<EOF
++flush ruleset
++table ip nat {
++ chain postrouting {
++ type nat hook postrouting priority 0;
++ # NB: masquerade should always be combined with 'oif(name) bla',
++ # lack of this is intentional here, we want to exercise double-snat.
++ ip saddr 172.30.30.0/30 counter masquerade random
++ }
++}
++EOF
++ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
++ if [ $? -ne 0 ]; then
++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
++ ret=1
++ return
++ fi
++
++ # must also check that nat table was evaluated on second (lower device) iteration.
++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
++ if [ $? -eq 0 ]; then
++ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
++ else
++ echo "FAIL: vrf masq rule has unexpected counter value"
++ ret=1
++ fi
++}
++
++# add masq rule that gets evaluated w. outif set to veth device.
++# This tests the 2nd iteration of the packet through conntrack,
++# oifname is the lower device (veth0 in this case).
++test_masquerade_veth()
++{
++ ip netns exec $ns0 conntrack -F 2>/dev/null
++ip netns exec $ns0 nft -f - <<EOF
++flush ruleset
++table ip nat {
++ chain postrouting {
++ type nat hook postrouting priority 0;
++ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
++ }
++}
++EOF
++ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
++ if [ $? -ne 0 ]; then
++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
++ ret=1
++ return
++ fi
++
++ # must also check that nat table was evaluated on second (lower device) iteration.
++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
++ if [ $? -eq 0 ]; then
++ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
++ else
++ echo "FAIL: vrf masq rule has unexpected counter value"
++ ret=1
++ fi
++}
++
++test_ct_zone_in
++test_masquerade_vrf
++test_masquerade_veth
++
++exit $ret
can-kvaser_usb-get-can-clock-frequency-from-device.patch
can-kvaser_pciefd-kvaser_pciefd_rx_error_frame-increase-correct-stats-rx-tx-_errors-counter.patch
can-sja1000-fix-use-after-free-in-ems_pcmcia_add_card.patch
+nfc-fix-potential-null-pointer-deref-in-nfc_genl_dump_ses_done.patch
+selftests-netfilter-add-a-vrf-conntrack-testcase.patch
+vrf-don-t-run-conntrack-on-vrf-with-dflt-qdisc.patch
+bpf-fix-the-off-by-two-error-in-range-markings.patch
+ice-ignore-dropped-packets-during-init.patch
+bonding-make-tx_rebalance_counter-an-atomic.patch
+nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch
--- /dev/null
+From d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Fri, 26 Nov 2021 15:36:12 +0100
+Subject: vrf: don't run conntrack on vrf with !dflt qdisc
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 upstream.
+
+After the below patch, the conntrack attached to skb is set to "notrack" in
+the context of vrf device, for locally generated packets.
+But this is true only when the default qdisc is set to the vrf device. When
+changing the qdisc, notrack is not set anymore.
+In fact, there is a shortcut in the vrf driver, when the default qdisc is
+set, see commit dcdd43c41e60 ("net: vrf: performance improvements for
+IPv4") for more details.
+
+This patch ensures that the behavior is always the same, whatever the qdisc
+is.
+
+To demonstrate the difference, a new test is added in conntrack_vrf.sh.
+
+Fixes: 8c9c296adfae ("vrf: run conntrack only in context of lower/physdev for locally generated packets")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c | 8 ++---
+ tools/testing/selftests/netfilter/conntrack_vrf.sh | 30 ++++++++++++++++++---
+ 2 files changed, 30 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -495,8 +495,6 @@ static struct sk_buff *vrf_ip6_out_direc
+
+ skb->dev = vrf_dev;
+
+- vrf_nf_set_untracked(skb);
+-
+ err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
+
+@@ -517,6 +515,8 @@ static struct sk_buff *vrf_ip6_out(struc
+ if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+ return skb;
+
++ vrf_nf_set_untracked(skb);
++
+ if (qdisc_tx_is_default(vrf_dev) ||
+ IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+ return vrf_ip6_out_direct(vrf_dev, sk, skb);
+@@ -732,8 +732,6 @@ static struct sk_buff *vrf_ip_out_direct
+
+ skb->dev = vrf_dev;
+
+- vrf_nf_set_untracked(skb);
+-
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
+
+@@ -755,6 +753,8 @@ static struct sk_buff *vrf_ip_out(struct
+ ipv4_is_lbcast(ip_hdr(skb)->daddr))
+ return skb;
+
++ vrf_nf_set_untracked(skb);
++
+ if (qdisc_tx_is_default(vrf_dev) ||
+ IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+ return vrf_ip_out_direct(vrf_dev, sk, skb);
+--- a/tools/testing/selftests/netfilter/conntrack_vrf.sh
++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
+@@ -150,11 +150,27 @@ EOF
+ # oifname is the vrf device.
+ test_masquerade_vrf()
+ {
++ local qdisc=$1
++
++ if [ "$qdisc" != "default" ]; then
++ tc -net $ns0 qdisc add dev tvrf root $qdisc
++ fi
++
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ ip netns exec $ns0 nft -f - <<EOF
+ flush ruleset
+ table ip nat {
++ chain rawout {
++ type filter hook output priority raw;
++
++ oif tvrf ct state untracked counter
++ }
++ chain postrouting2 {
++ type filter hook postrouting priority mangle;
++
++ oif tvrf ct state untracked counter
++ }
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+@@ -171,13 +187,18 @@ EOF
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+- ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
++ ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
+ if [ $? -eq 0 ]; then
+- echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
++ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ else
+- echo "FAIL: vrf masq rule has unexpected counter value"
++ echo "FAIL: vrf rules have unexpected counter value"
+ ret=1
+ fi
++
++ if [ "$qdisc" != "default" ]; then
++ tc -net $ns0 qdisc del dev tvrf root
++ fi
+ }
+
+ # add masq rule that gets evaluated w. outif set to veth device.
+@@ -213,7 +234,8 @@ EOF
+ }
+
+ test_ct_zone_in
+-test_masquerade_vrf
++test_masquerade_vrf "default"
++test_masquerade_vrf "pfifo"
+ test_masquerade_veth
+
+ exit $ret