--- /dev/null
+From fb5dd59dcee96c6e83b29515d03c793d670f454e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 14:42:40 -0700
+Subject: bnxt: disable napi before canceling DIM
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 01cca6b9330ac7460de44eeeb3a0607f8aae69ff ]
+
+napi schedules DIM, napi has to be disabled first,
+then DIM canceled.
+
+Noticed while reading the code.
+
+Fixes: 0bc0b97fca73 ("bnxt_en: cleanup DIM work on device shutdown")
+Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 8111aefb2411..1b5839ad97b6 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8346,10 +8346,9 @@ static void bnxt_disable_napi(struct bnxt *bp)
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+
++ napi_disable(&bp->bnapi[i]->napi);
+ if (bp->bnapi[i]->rx_ring)
+ cancel_work_sync(&cpr->dim.work);
+-
+- napi_disable(&bp->bnapi[i]->napi);
+ }
+ }
+
+--
+2.30.2
+
--- /dev/null
+From 0b25b021a2020f746ea39d86ba542c199bf63927 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 14:42:39 -0700
+Subject: bnxt: don't lock the tx queue from napi poll
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 3c603136c9f82833813af77185618de5af67676c ]
+
+We can't take the tx lock from the napi poll routine, because
+netpoll can poll napi at any moment, including with the tx lock
+already held.
+
+The tx lock is protecting against two paths - the disable
+path, and (as Michael points out) the NETDEV_TX_BUSY case
+which may occur if NAPI completions race with start_xmit
+and both decide to re-enable the queue.
+
+For the disable/ifdown path use synchronize_net() to make sure
+closing the device does not race we restarting the queues.
+Annotate accesses to dev_state against data races.
+
+For the NAPI cleanup vs start_xmit path - appropriate barriers
+are already in place in the main spot where Tx queue is stopped
+but we need to do the same careful dance in the TX_BUSY case.
+
+Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 ++++++++++++++---------
+ 1 file changed, 32 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 287ea792922a..8111aefb2411 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -360,6 +360,26 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+ return md_dst->u.port_info.port_id;
+ }
+
++static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp,
++ struct bnxt_tx_ring_info *txr,
++ struct netdev_queue *txq)
++{
++ netif_tx_stop_queue(txq);
++
++ /* netif_tx_stop_queue() must be done before checking
++ * tx index in bnxt_tx_avail() below, because in
++ * bnxt_tx_int(), we update tx index before checking for
++ * netif_tx_queue_stopped().
++ */
++ smp_mb();
++ if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) {
++ netif_tx_wake_queue(txq);
++ return false;
++ }
++
++ return true;
++}
++
+ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct bnxt *bp = netdev_priv(dev);
+@@ -387,8 +407,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ free_size = bnxt_tx_avail(bp, txr);
+ if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) {
+- netif_tx_stop_queue(txq);
+- return NETDEV_TX_BUSY;
++ if (bnxt_txr_netif_try_stop_queue(bp, txr, txq))
++ return NETDEV_TX_BUSY;
+ }
+
+ length = skb->len;
+@@ -597,16 +617,7 @@ tx_done:
+ if (netdev_xmit_more() && !tx_buf->is_push)
+ bnxt_db_write(bp, &txr->tx_db, prod);
+
+- netif_tx_stop_queue(txq);
+-
+- /* netif_tx_stop_queue() must be done before checking
+- * tx index in bnxt_tx_avail() below, because in
+- * bnxt_tx_int(), we update tx index before checking for
+- * netif_tx_queue_stopped().
+- */
+- smp_mb();
+- if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)
+- netif_tx_wake_queue(txq);
++ bnxt_txr_netif_try_stop_queue(bp, txr, txq);
+ }
+ return NETDEV_TX_OK;
+
+@@ -690,14 +701,9 @@ next_tx_int:
+ smp_mb();
+
+ if (unlikely(netif_tx_queue_stopped(txq)) &&
+- (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) {
+- __netif_tx_lock(txq, smp_processor_id());
+- if (netif_tx_queue_stopped(txq) &&
+- bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
+- txr->dev_state != BNXT_DEV_STATE_CLOSING)
+- netif_tx_wake_queue(txq);
+- __netif_tx_unlock(txq);
+- }
++ bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
++ READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING)
++ netif_tx_wake_queue(txq);
+ }
+
+ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+@@ -8371,9 +8377,11 @@ void bnxt_tx_disable(struct bnxt *bp)
+ if (bp->tx_ring) {
+ for (i = 0; i < bp->tx_nr_rings; i++) {
+ txr = &bp->tx_ring[i];
+- txr->dev_state = BNXT_DEV_STATE_CLOSING;
++ WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING);
+ }
+ }
++ /* Make sure napi polls see @dev_state change */
++ synchronize_net();
+ /* Drop carrier first to prevent TX timeout */
+ netif_carrier_off(bp->dev);
+ /* Stop all TX queues */
+@@ -8387,8 +8395,10 @@ void bnxt_tx_enable(struct bnxt *bp)
+
+ for (i = 0; i < bp->tx_nr_rings; i++) {
+ txr = &bp->tx_ring[i];
+- txr->dev_state = 0;
++ WRITE_ONCE(txr->dev_state, 0);
+ }
++ /* Make sure napi polls see @dev_state change */
++ synchronize_net();
+ netif_tx_wake_all_queues(bp->dev);
+ if (bp->link_info.link_up)
+ netif_carrier_on(bp->dev);
+--
+2.30.2
+
--- /dev/null
+From a6ad1b5453c70185001ef64440f2953e322ff991 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 Aug 2021 16:15:37 -0400
+Subject: bnxt_en: Add missing DMA memory barriers
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 828affc27ed43441bd1efdaf4e07e96dd43a0362 ]
+
+Each completion ring entry has a valid bit to indicate that the entry
+contains a valid completion event. The driver's main poll loop
+__bnxt_poll_work() has the proper dma_rmb() to make sure the valid
+bit of the next entry has been checked before proceeding further.
+But when we call bnxt_rx_pkt() to process the RX event, the RX
+completion event consists of two completion entries and only the
+first entry has been checked to be valid. We need the same barrier
+after checking the next completion entry. Add missing dma_rmb()
+barriers in bnxt_rx_pkt() and other similar locations.
+
+Fixes: 67a95e2022c7 ("bnxt_en: Need memory barrier when processing the completion ring.")
+Reported-by: Lance Richardson <lance.richardson@broadcom.com>
+Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
+Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 1b5839ad97b6..e67f07faca78 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1724,6 +1724,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+ return -EBUSY;
+
++ /* The valid test of the entry must be done first before
++ * reading any further.
++ */
++ dma_rmb();
+ prod = rxr->rx_prod;
+
+ if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
+@@ -1918,6 +1922,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
+ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+ return -EBUSY;
+
++ /* The valid test of the entry must be done first before
++ * reading any further.
++ */
++ dma_rmb();
+ cmp_type = RX_CMP_TYPE(rxcmp);
+ if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+ rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+@@ -2314,6 +2322,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
+ if (!TX_CMP_VALID(txcmp, raw_cons))
+ break;
+
++ /* The valid test of the entry must be done first before
++ * reading any further.
++ */
++ dma_rmb();
+ if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
+ tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
+ cp_cons = RING_CMP(tmp_raw_cons);
+--
+2.30.2
+
--- /dev/null
+From c2c3d8d0a10e43195e67431bdf1a5431a5546ce9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 17:18:10 +0200
+Subject: bpf: Clear zext_dst of dead insns
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+[ Upstream commit 45c709f8c71b525b51988e782febe84ce933e7e0 ]
+
+"access skb fields ok" verifier test fails on s390 with the "verifier
+bug. zext_dst is set, but no reg is defined" message. The first insns
+of the test prog are ...
+
+ 0: 61 01 00 00 00 00 00 00 ldxw %r0,[%r1+0]
+ 8: 35 00 00 01 00 00 00 00 jge %r0,0,1
+ 10: 61 01 00 08 00 00 00 00 ldxw %r0,[%r1+8]
+
+... and the 3rd one is dead (this does not look intentional to me, but
+this is a separate topic).
+
+sanitize_dead_code() converts dead insns into "ja -1", but keeps
+zext_dst. When opt_subreg_zext_lo32_rnd_hi32() tries to parse such
+an insn, it sees this discrepancy and bails. This problem can be seen
+only with JITs whose bpf_jit_needs_zext() returns true.
+
+Fix by clearning dead insns' zext_dst.
+
+The commits that contributed to this problem are:
+
+1. 5aa5bd14c5f8 ("bpf: add initial suite for selftests"), which
+ introduced the test with the dead code.
+2. 5327ed3d44b7 ("bpf: verifier: mark verified-insn with
+ sub-register zext flag"), which introduced the zext_dst flag.
+3. 83a2881903f3 ("bpf: Account for BPF_FETCH in
+ insn_has_def32()"), which introduced the sanity check.
+4. 9183671af6db ("bpf: Fix leakage under speculation on
+ mispredicted branches"), which bisect points to.
+
+It's best to fix this on stable branches that contain the second one,
+since that's the point where the inconsistency was introduced.
+
+Fixes: 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20210812151811.184086-2-iii@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 52c2b11a0b47..0b5a446ee59c 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -8586,6 +8586,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
+ if (aux_data[i].seen)
+ continue;
+ memcpy(insn + i, &trap, sizeof(trap));
++ aux_data[i].zext_dst = false;
+ }
+ }
+
+--
+2.30.2
+
--- /dev/null
+From c78c895f3773d925a51bbe89e33799e27293a6c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Jul 2021 00:56:01 +0200
+Subject: cpufreq: armada-37xx: forbid cpufreq for 1.2 GHz variant
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek BehĂșn <kabel@kernel.org>
+
+[ Upstream commit 484f2b7c61b9ae58cc00c5127bcbcd9177af8dfe ]
+
+The 1.2 GHz variant of the Armada 3720 SOC is unstable with DVFS: when
+the SOC boots, the WTMI firmware sets clocks and AVS values that work
+correctly with 1.2 GHz CPU frequency, but random crashes occur once
+cpufreq driver starts scaling.
+
+We do not know currently what is the reason:
+- it may be that the voltage value for L0 for 1.2 GHz variant provided
+ by the vendor in the OTP is simply incorrect when scaling is used,
+- it may be that some delay is needed somewhere,
+- it may be something else.
+
+The most sane solution now seems to be to simply forbid the cpufreq
+driver on 1.2 GHz variant.
+
+Signed-off-by: Marek BehĂșn <kabel@kernel.org>
+Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx")
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/armada-37xx-cpufreq.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
+index e4782f562e7a..2de7fd18f66a 100644
+--- a/drivers/cpufreq/armada-37xx-cpufreq.c
++++ b/drivers/cpufreq/armada-37xx-cpufreq.c
+@@ -102,7 +102,11 @@ struct armada_37xx_dvfs {
+ };
+
+ static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
+- {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} },
++ /*
++ * The cpufreq scaling for 1.2 GHz variant of the SOC is currently
++ * unstable because we do not know how to configure it properly.
++ */
++ /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */
+ {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} },
+ {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} },
+ {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} },
+--
+2.30.2
+
--- /dev/null
+From 2eada32d4174313977c057de3747d7f2ce665fa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 8 Aug 2021 16:04:40 -0700
+Subject: dccp: add do-while-0 stubs for dccp_pr_debug macros
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 86aab09a4870bb8346c9579864588c3d7f555299 ]
+
+GCC complains about empty macros in an 'if' statement, so convert
+them to 'do {} while (0)' macros.
+
+Fixes these build warnings:
+
+net/dccp/output.c: In function 'dccp_xmit_packet':
+../net/dccp/output.c:283:71: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
+ 283 | dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+net/dccp/ackvec.c: In function 'dccp_ackvec_update_old':
+../net/dccp/ackvec.c:163:80: warning: suggest braces around empty body in an 'else' statement [-Wempty-body]
+ 163 | (unsigned long long)seqno, state);
+
+Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface")
+Fixes: 380240864451 ("dccp ccid-2: Update code for the Ack Vector input/registration routine")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: dccp@vger.kernel.org
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dccp/dccp.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
+index 9c3b27c257bb..cb818617699c 100644
+--- a/net/dccp/dccp.h
++++ b/net/dccp/dccp.h
+@@ -41,9 +41,9 @@ extern bool dccp_debug;
+ #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
+ #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
+ #else
+-#define dccp_pr_debug(format, a...)
+-#define dccp_pr_debug_cat(format, a...)
+-#define dccp_debug(format, a...)
++#define dccp_pr_debug(format, a...) do {} while (0)
++#define dccp_pr_debug_cat(format, a...) do {} while (0)
++#define dccp_debug(format, a...) do {} while (0)
+ #endif
+
+ extern struct inet_hashinfo dccp_hashinfo;
+--
+2.30.2
+
--- /dev/null
+From a4228012932c24b1bb7c2646320b4ea29d6c386d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:42:16 -0700
+Subject: i40e: Fix ATR queue selection
+
+From: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+
+[ Upstream commit a222be597e316389f9f8c26033352c124ce93056 ]
+
+Without this patch, ATR does not work. Receive/transmit uses queue
+selection based on SW DCB hashing method.
+
+If traffic classes are not configured for PF, then use
+netdev_pick_tx function for selecting queue for packet transmission.
+Instead of calling i40e_swdcb_skb_tx_hash, call netdev_pick_tx,
+which ensures that packet is transmitted/received from CPU that is
+running the application.
+
+Reproduction steps:
+1. Load i40e driver
+2. Map each MSI interrupt of i40e port for each CPU
+3. Disable ntuple, enable ATR i.e.:
+ethtool -K $interface ntuple off
+ethtool --set-priv-flags $interface flow-director-atr
+4. Run application that is generating traffic and is bound to a
+single CPU, i.e.:
+taskset -c 9 netperf -H 1.1.1.1 -t TCP_RR -l 10
+5. Observe behavior:
+Application's traffic should be restricted to the CPU provided in
+taskset.
+
+Fixes: 89ec1f0886c1 ("i40e: Fix queue-to-TC mapping on Tx")
+Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 8e38c547b53f..06987913837a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -3553,8 +3553,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
+
+ /* is DCB enabled at all? */
+ if (vsi->tc_config.numtc == 1)
+- return i40e_swdcb_skb_tx_hash(netdev, skb,
+- netdev->real_num_tx_queues);
++ return netdev_pick_tx(netdev, skb, sb_dev);
+
+ prio = skb->priority;
+ hw = &vsi->back->hw;
+--
+2.30.2
+
--- /dev/null
+From 8d57b41785021e0729054590d7009f7220b41e2e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:42:17 -0700
+Subject: iavf: Fix ping is lost after untrusted VF had tried to change MAC
+
+From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+
+[ Upstream commit 8da80c9d50220a8e4190a4eaa0dd6aeefcbbb5bf ]
+
+Make changes to MAC address dependent on the response of PF.
+Disallow changes to HW MAC address and MAC filter from untrusted
+VF, thanks to that ping is not lost if VF tries to change MAC.
+Add a new field in iavf_mac_filter, to indicate whether there
+was response from PF for given filter. Based on this field pass
+or discard the filter.
+If untrusted VF tried to change it's address, it's not changed.
+Still filter was changed, because of that ping couldn't go through.
+
+Fixes: c5c922b3e09b ("iavf: fix MAC address setting for VFs when filter is rejected")
+Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Gurucharan G <Gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h | 1 +
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 1 +
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c | 47 ++++++++++++++++++-
+ 3 files changed, 47 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 6b9117a350fa..81ca6472937d 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -134,6 +134,7 @@ struct iavf_q_vector {
+ struct iavf_mac_filter {
+ struct list_head list;
+ u8 macaddr[ETH_ALEN];
++ bool is_new_mac; /* filter is new, wait for PF decision */
+ bool remove; /* filter needs to be removed */
+ bool add; /* filter needs to be added */
+ };
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index dc902e371c2c..94a3f000e999 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -761,6 +761,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+
+ list_add_tail(&f->list, &adapter->mac_filter_list);
+ f->add = true;
++ f->is_new_mac = true;
+ adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+ } else {
+ f->remove = false;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 9655318803b7..4d471a6f2946 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -564,6 +564,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
+ kfree(veal);
+ }
+
++/**
++ * iavf_mac_add_ok
++ * @adapter: adapter structure
++ *
++ * Submit list of filters based on PF response.
++ **/
++static void iavf_mac_add_ok(struct iavf_adapter *adapter)
++{
++ struct iavf_mac_filter *f, *ftmp;
++
++ spin_lock_bh(&adapter->mac_vlan_list_lock);
++ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
++ f->is_new_mac = false;
++ }
++ spin_unlock_bh(&adapter->mac_vlan_list_lock);
++}
++
++/**
++ * iavf_mac_add_reject
++ * @adapter: adapter structure
++ *
++ * Remove filters from list based on PF response.
++ **/
++static void iavf_mac_add_reject(struct iavf_adapter *adapter)
++{
++ struct net_device *netdev = adapter->netdev;
++ struct iavf_mac_filter *f, *ftmp;
++
++ spin_lock_bh(&adapter->mac_vlan_list_lock);
++ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
++ if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr))
++ f->remove = false;
++
++ if (f->is_new_mac) {
++ list_del(&f->list);
++ kfree(f);
++ }
++ }
++ spin_unlock_bh(&adapter->mac_vlan_list_lock);
++}
++
+ /**
+ * iavf_add_vlans
+ * @adapter: adapter structure
+@@ -1316,6 +1357,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ case VIRTCHNL_OP_ADD_ETH_ADDR:
+ dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
+ iavf_stat_str(&adapter->hw, v_retval));
++ iavf_mac_add_reject(adapter);
+ /* restore administratively set MAC address */
+ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+ break;
+@@ -1385,10 +1427,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ }
+ }
+ switch (v_opcode) {
+- case VIRTCHNL_OP_ADD_ETH_ADDR: {
++ case VIRTCHNL_OP_ADD_ETH_ADDR:
++ if (!v_retval)
++ iavf_mac_add_ok(adapter);
+ if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+ ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+- }
+ break;
+ case VIRTCHNL_OP_GET_STATS: {
+ struct iavf_eth_stats *stats =
+--
+2.30.2
+
--- /dev/null
+From 31572e7a3b58674bcd68893db81d12835435fdcd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 31 Jul 2021 09:47:37 +0200
+Subject: iommu: Check if group is NULL before remove device
+
+From: Frank Wunderlich <frank-w@public-files.de>
+
+[ Upstream commit 5aa95d8834e07907e64937d792c12ffef7fb271f ]
+
+If probe_device is failing, iommu_group is not initialized because
+iommu_group_add_device is not reached, so freeing it will result
+in NULL pointer access.
+
+iommu_bus_init
+ ->bus_iommu_probe
+ ->probe_iommu_group in for each:/* return -22 in fail case */
+ ->iommu_probe_device
+ ->__iommu_probe_device /* return -22 here.*/
+ -> ops->probe_device /* return -22 here.*/
+ -> iommu_group_get_for_dev
+ -> ops->device_group
+ -> iommu_group_add_device //good case
+ ->remove_iommu_group //in fail case, it will remove group
+ ->iommu_release_device
+ ->iommu_group_remove_device // here we don't have group
+
+In my case ops->probe_device (mtk_iommu_probe_device from
+mtk_iommu_v1.c) is due to failing fwspec->ops mismatch.
+
+Fixes: d72e31c93746 ("iommu: IOMMU Groups")
+Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
+Link: https://lore.kernel.org/r/20210731074737.4573-1-linux@fw-web.de
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/iommu.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 9d7232e26ecf..c5758fb696cc 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -775,6 +775,9 @@ void iommu_group_remove_device(struct device *dev)
+ struct iommu_group *group = dev->iommu_group;
+ struct group_device *tmp_device, *device = NULL;
+
++ if (!group)
++ return;
++
+ dev_info(dev, "Removing from iommu group %d\n", group->id);
+
+ /* Pre-notify listeners that a device is being removed. */
+--
+2.30.2
+
--- /dev/null
+From f56ebc8fee81e13d5378c9b1789dd4764c03b909 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Aug 2021 18:14:33 +0300
+Subject: net: 6pack: fix slab-out-of-bounds in decode_data
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 19d1532a187669ce86d5a2696eb7275310070793 ]
+
+Syzbot reported slab-out-of bounds write in decode_data().
+The problem was in missing validation checks.
+
+Syzbot's reproducer generated malicious input, which caused
+decode_data() to be called a lot in sixpack_decode(). Since
+rx_count_cooked is only 400 bytes and noone reported before,
+that 400 bytes is not enough, let's just check if input is malicious
+and complain about buffer overrun.
+
+Fail log:
+==================================================================
+BUG: KASAN: slab-out-of-bounds in drivers/net/hamradio/6pack.c:843
+Write of size 1 at addr ffff888087c5544e by task kworker/u4:0/7
+
+CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.6.0-rc3-syzkaller #0
+...
+Workqueue: events_unbound flush_to_ldisc
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x197/0x210 lib/dump_stack.c:118
+ print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374
+ __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506
+ kasan_report+0x12/0x20 mm/kasan/common.c:641
+ __asan_report_store1_noabort+0x17/0x20 mm/kasan/generic_report.c:137
+ decode_data.part.0+0x23b/0x270 drivers/net/hamradio/6pack.c:843
+ decode_data drivers/net/hamradio/6pack.c:965 [inline]
+ sixpack_decode drivers/net/hamradio/6pack.c:968 [inline]
+
+Reported-and-tested-by: syzbot+fc8cd9a673d4577fb2e4@syzkaller.appspotmail.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hamradio/6pack.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
+index 71d6629e65c9..da13683d52d1 100644
+--- a/drivers/net/hamradio/6pack.c
++++ b/drivers/net/hamradio/6pack.c
+@@ -839,6 +839,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte)
+ return;
+ }
+
++ if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) {
++ pr_err("6pack: cooked buffer overrun, data loss\n");
++ sp->rx_count = 0;
++ return;
++ }
++
+ buf = sp->raw_buf;
+ sp->cooked_buf[sp->rx_count_cooked++] =
+ buf[0] | ((buf[1] << 2) & 0xc0);
+--
+2.30.2
+
--- /dev/null
+From ef19b1e55f0cd20877624b2fe850bbd725b1fc6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 20:38:02 -0700
+Subject: net: mdio-mux: Don't ignore memory allocation errors
+
+From: Saravana Kannan <saravanak@google.com>
+
+[ Upstream commit 99d81e942474cc7677d12f673f42a7ea699e2589 ]
+
+If we are seeing memory allocation errors, don't try to continue
+registering child mdiobus devices. It's unlikely they'll succeed.
+
+Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose")
+Signed-off-by: Saravana Kannan <saravanak@google.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Tested-by: Marc Zyngier <maz@kernel.org>
+Acked-by: Kevin Hilman <khilman@baylibre.com>
+Tested-by: Kevin Hilman <khilman@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/mdio-mux.c | 28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
+index 6a1d3540210b..c96ef3b3fa3a 100644
+--- a/drivers/net/phy/mdio-mux.c
++++ b/drivers/net/phy/mdio-mux.c
+@@ -82,6 +82,17 @@ out:
+
+ static int parent_count;
+
++static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb)
++{
++ struct mdio_mux_child_bus *cb = pb->children;
++
++ while (cb) {
++ mdiobus_unregister(cb->mii_bus);
++ mdiobus_free(cb->mii_bus);
++ cb = cb->next;
++ }
++}
++
+ int mdio_mux_init(struct device *dev,
+ struct device_node *mux_node,
+ int (*switch_fn)(int cur, int desired, void *data),
+@@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev,
+ cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL);
+ if (!cb) {
+ ret_val = -ENOMEM;
+- continue;
++ goto err_loop;
+ }
+ cb->bus_number = v;
+ cb->parent = pb;
+@@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev,
+ cb->mii_bus = mdiobus_alloc();
+ if (!cb->mii_bus) {
+ ret_val = -ENOMEM;
+- devm_kfree(dev, cb);
+- continue;
++ goto err_loop;
+ }
+ cb->mii_bus->priv = cb;
+
+@@ -182,6 +192,10 @@ int mdio_mux_init(struct device *dev,
+
+ dev_err(dev, "Error: No acceptable child buses found\n");
+ devm_kfree(dev, pb);
++
++err_loop:
++ mdio_mux_uninit_children(pb);
++ of_node_put(child_bus_node);
+ err_pb_kz:
+ put_device(&parent_bus->dev);
+ err_parent_bus:
+@@ -193,14 +207,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init);
+ void mdio_mux_uninit(void *mux_handle)
+ {
+ struct mdio_mux_parent_bus *pb = mux_handle;
+- struct mdio_mux_child_bus *cb = pb->children;
+-
+- while (cb) {
+- mdiobus_unregister(cb->mii_bus);
+- mdiobus_free(cb->mii_bus);
+- cb = cb->next;
+- }
+
++ mdio_mux_uninit_children(pb);
+ put_device(&pb->mii_bus->dev);
+ }
+ EXPORT_SYMBOL_GPL(mdio_mux_uninit);
+--
+2.30.2
+
--- /dev/null
+From 24174dfc032da5ba2073500b0c32584372585108 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 20:38:03 -0700
+Subject: net: mdio-mux: Handle -EPROBE_DEFER correctly
+
+From: Saravana Kannan <saravanak@google.com>
+
+[ Upstream commit 7bd0cef5dac685f09ef8b0b2a7748ff42d284dc7 ]
+
+When registering mdiobus children, if we get an -EPROBE_DEFER, we shouldn't
+ignore it and continue registering the rest of the mdiobus children. This
+would permanently prevent the deferring child mdiobus from working instead
+of reattempting it in the future. So, if a child mdiobus needs to be
+reattempted in the future, defer the entire mdio-mux initialization.
+
+This fixes the issue where PHYs sitting under the mdio-mux aren't
+initialized correctly if the PHY's interrupt controller is not yet ready
+when the mdio-mux is being probed. Additional context in the link below.
+
+Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.")
+Link: https://lore.kernel.org/lkml/CAGETcx95kHrv8wA-O+-JtfH7H9biJEGJtijuPVN0V5dUKUAB3A@mail.gmail.com/#t
+Signed-off-by: Saravana Kannan <saravanak@google.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Tested-by: Marc Zyngier <maz@kernel.org>
+Acked-by: Kevin Hilman <khilman@baylibre.com>
+Tested-by: Kevin Hilman <khilman@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/mdio-mux.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
+index c96ef3b3fa3a..ccb3ee704eb1 100644
+--- a/drivers/net/phy/mdio-mux.c
++++ b/drivers/net/phy/mdio-mux.c
+@@ -175,11 +175,15 @@ int mdio_mux_init(struct device *dev,
+ cb->mii_bus->write = mdio_mux_write;
+ r = of_mdiobus_register(cb->mii_bus, child_bus_node);
+ if (r) {
++ mdiobus_free(cb->mii_bus);
++ if (r == -EPROBE_DEFER) {
++ ret_val = r;
++ goto err_loop;
++ }
++ devm_kfree(dev, cb);
+ dev_err(dev,
+ "Error: Failed to register MDIO bus for child %pOF\n",
+ child_bus_node);
+- mdiobus_free(cb->mii_bus);
+- devm_kfree(dev, cb);
+ } else {
+ cb->next = pb->children;
+ pb->children = cb;
+--
+2.30.2
+
--- /dev/null
+From b78272abfa40ddede0063c48d4f62fe41e6c6ec5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Aug 2021 21:14:04 +0800
+Subject: net: qlcnic: add missed unlock in qlcnic_83xx_flash_read32
+
+From: Dinghao Liu <dinghao.liu@zju.edu.cn>
+
+[ Upstream commit 0a298d133893c72c96e2156ed7cb0f0c4a306a3e ]
+
+qlcnic_83xx_unlock_flash() is called on all paths after we call
+qlcnic_83xx_lock_flash(), except for one error path on failure
+of QLCRD32(), which may cause a deadlock. This bug is suggested
+by a static analysis tool, please advise.
+
+Fixes: 81d0aeb0a4fff ("qlcnic: flash template based firmware reset recovery")
+Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
+Link: https://lore.kernel.org/r/20210816131405.24024-1-dinghao.liu@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+index 29b9c728a65e..f2014c10f7c9 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -3158,8 +3158,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
+
+ indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr);
+ ret = QLCRD32(adapter, indirect_addr, &err);
+- if (err == -EIO)
++ if (err == -EIO) {
++ qlcnic_83xx_unlock_flash(adapter);
+ return err;
++ }
+
+ word = ret;
+ *(u32 *)p_data = word;
+--
+2.30.2
+
--- /dev/null
+From 452fdec58660a92af8b9b6545d80610b98f19438 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:22:15 +0800
+Subject: ovs: clear skb->tstamp in forwarding path
+
+From: kaixi.fan <fankaixi.li@bytedance.com>
+
+[ Upstream commit 01634047bf0d5c2d9b7d8095bb4de1663dbeedeb ]
+
+fq qdisc requires tstamp to be cleared in the forwarding path. Now ovs
+doesn't clear skb->tstamp. We encountered a problem with linux
+version 5.4.56 and ovs version 2.14.1, and packets failed to
+dequeue from qdisc when fq qdisc was attached to ovs port.
+
+Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC")
+Signed-off-by: kaixi.fan <fankaixi.li@bytedance.com>
+Signed-off-by: xiexiaohui <xiexiaohui.xxh@bytedance.com>
+Reviewed-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/vport.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
+index 3fc38d16c456..19af0efeb8dc 100644
+--- a/net/openvswitch/vport.c
++++ b/net/openvswitch/vport.c
+@@ -499,6 +499,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
+ }
+
+ skb->dev = vport->dev;
++ skb->tstamp = 0;
+ vport->ops->send(skb);
+ return;
+
+--
+2.30.2
+
--- /dev/null
+From 5fe32a02783b01bc75aa09b476bb89fa3fb11f7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Aug 2021 20:33:27 +0300
+Subject: ptp_pch: Restore dependency on PCI
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 55c8fca1dae1fb0d11deaa21b65a647dedb1bc50 ]
+
+During the swap dependency on PCH_GBE to selection PTP_1588_CLOCK_PCH
+incidentally dropped the implicit dependency on the PCI. Restore it.
+
+Fixes: 18d359ceb044 ("pch_gbe, ptp_pch: Fix the dependency direction between these drivers")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
+index 0517272a268e..9fb6f7643ea9 100644
+--- a/drivers/ptp/Kconfig
++++ b/drivers/ptp/Kconfig
+@@ -92,7 +92,8 @@ config DP83640_PHY
+ config PTP_1588_CLOCK_PCH
+ tristate "Intel PCH EG20T as PTP clock"
+ depends on X86_32 || COMPILE_TEST
+- depends on HAS_IOMEM && NET
++ depends on HAS_IOMEM && PCI
++ depends on NET
+ imply PTP_1588_CLOCK
+ help
+ This driver adds support for using the PCH EG20T as a PTP
+--
+2.30.2
+
net-usb-lan78xx-don-t-modify-phy_device-state-concur.patch
drm-amd-display-fix-dynamic-bpp-issue-with-8k30-with.patch
bluetooth-hidp-use-correct-wait-queue-when-removing-.patch
+iommu-check-if-group-is-null-before-remove-device.patch
+cpufreq-armada-37xx-forbid-cpufreq-for-1.2-ghz-varia.patch
+dccp-add-do-while-0-stubs-for-dccp_pr_debug-macros.patch
+virtio-protect-vqs-list-access.patch
+vhost-fix-the-calculation-in-vhost_overflow.patch
+bpf-clear-zext_dst-of-dead-insns.patch
+bnxt-don-t-lock-the-tx-queue-from-napi-poll.patch
+bnxt-disable-napi-before-canceling-dim.patch
+net-6pack-fix-slab-out-of-bounds-in-decode_data.patch
+ptp_pch-restore-dependency-on-pci.patch
+bnxt_en-add-missing-dma-memory-barriers.patch
+vrf-reset-skb-conntrack-connection-on-vrf-rcv.patch
+virtio-net-support-xdp-when-not-more-queues.patch
+virtio-net-use-netif_f_gro_hw-instead-of-netif_f_lro.patch
+net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch
+net-mdio-mux-don-t-ignore-memory-allocation-errors.patch
+net-mdio-mux-handle-eprobe_defer-correctly.patch
+ovs-clear-skb-tstamp-in-forwarding-path.patch
+i40e-fix-atr-queue-selection.patch
+iavf-fix-ping-is-lost-after-untrusted-vf-had-tried-t.patch
--- /dev/null
+From bcb9042bb3f5a18e6885131afb7ddbbffa90a04f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jul 2021 21:07:56 +0800
+Subject: vhost: Fix the calculation in vhost_overflow()
+
+From: Xie Yongji <xieyongji@bytedance.com>
+
+[ Upstream commit f7ad318ea0ad58ebe0e595e59aed270bb643b29b ]
+
+This fixes the incorrect calculation for integer overflow
+when the last address of iova range is 0xffffffff.
+
+Fixes: ec33d031a14b ("vhost: detect 32 bit integer wrap around")
+Reported-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://lore.kernel.org/r/20210728130756.97-2-xieyongji@bytedance.com
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vhost.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index a279ecacbf60..97be299f0a8d 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -702,10 +702,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+ (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
+ }
+
++/* Make sure 64 bit math will not overflow. */
+ static bool vhost_overflow(u64 uaddr, u64 size)
+ {
+- /* Make sure 64 bit math will not overflow. */
+- return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size;
++ if (uaddr > ULONG_MAX || size > ULONG_MAX)
++ return true;
++
++ if (!size)
++ return false;
++
++ return uaddr > ULONG_MAX - size + 1;
+ }
+
+ /* Caller should have vq mutex and device mutex. */
+--
+2.30.2
+
--- /dev/null
+From 2f3e671182f11df3607fcda06a01f1586cc4df7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Mar 2021 10:24:45 +0800
+Subject: virtio-net: support XDP when not more queues
+
+From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+
+[ Upstream commit 97c2c69e1926260c78c7f1c0b2c987934f1dc7a1 ]
+
+The number of queues implemented by many virtio backends is limited,
+especially some machines have a large number of CPUs. In this case, it
+is often impossible to allocate a separate queue for
+XDP_TX/XDP_REDIRECT, then xdp cannot be loaded to work, even xdp does
+not use the XDP_TX/XDP_REDIRECT.
+
+This patch allows XDP_TX/XDP_REDIRECT to run by reuse the existing SQ
+with __netif_tx_lock() hold when there are not enough queues.
+
+Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 62 +++++++++++++++++++++++++++++++---------
+ 1 file changed, 49 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 15453d6fcc23..36f8aeb113a8 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -195,6 +195,9 @@ struct virtnet_info {
+ /* # of XDP queue pairs currently used by the driver */
+ u16 xdp_queue_pairs;
+
++ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
++ bool xdp_enabled;
++
+ /* I like... big packets and I cannot lie! */
+ bool big_packets;
+
+@@ -485,12 +488,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+ return 0;
+ }
+
+-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
+-{
+- unsigned int qp;
+-
+- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+- return &vi->sq[qp];
++/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
++ * the current cpu, so it does not need to be locked.
++ *
++ * Here we use marco instead of inline functions because we have to deal with
++ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
++ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
++ * functions to perfectly solve these three problems at the same time.
++ */
++#define virtnet_xdp_get_sq(vi) ({ \
++ struct netdev_queue *txq; \
++ typeof(vi) v = (vi); \
++ unsigned int qp; \
++ \
++ if (v->curr_queue_pairs > nr_cpu_ids) { \
++ qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
++ qp += smp_processor_id(); \
++ txq = netdev_get_tx_queue(v->dev, qp); \
++ __netif_tx_acquire(txq); \
++ } else { \
++ qp = smp_processor_id() % v->curr_queue_pairs; \
++ txq = netdev_get_tx_queue(v->dev, qp); \
++ __netif_tx_lock(txq, raw_smp_processor_id()); \
++ } \
++ v->sq + qp; \
++})
++
++#define virtnet_xdp_put_sq(vi, q) { \
++ struct netdev_queue *txq; \
++ typeof(vi) v = (vi); \
++ \
++ txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
++ if (v->curr_queue_pairs > nr_cpu_ids) \
++ __netif_tx_release(txq); \
++ else \
++ __netif_tx_unlock(txq); \
+ }
+
+ static int virtnet_xdp_xmit(struct net_device *dev,
+@@ -516,7 +548,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
+ if (!xdp_prog)
+ return -ENXIO;
+
+- sq = virtnet_xdp_sq(vi);
++ sq = virtnet_xdp_get_sq(vi);
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+ ret = -EINVAL;
+@@ -564,12 +596,13 @@ out:
+ sq->stats.kicks += kicks;
+ u64_stats_update_end(&sq->stats.syncp);
+
++ virtnet_xdp_put_sq(vi, sq);
+ return ret;
+ }
+
+ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
+ {
+- return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
++ return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
+ }
+
+ /* We copy the packet for XDP in the following cases:
+@@ -1458,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
+ xdp_do_flush_map();
+
+ if (xdp_xmit & VIRTIO_XDP_TX) {
+- sq = virtnet_xdp_sq(vi);
++ sq = virtnet_xdp_get_sq(vi);
+ if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
+ u64_stats_update_begin(&sq->stats.syncp);
+ sq->stats.kicks++;
+ u64_stats_update_end(&sq->stats.syncp);
+ }
++ virtnet_xdp_put_sq(vi, sq);
+ }
+
+ return received;
+@@ -2480,10 +2514,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+
+ /* XDP requires extra queues for XDP_TX */
+ if (curr_qp + xdp_qp > vi->max_queue_pairs) {
+- NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
+- netdev_warn(dev, "request %i queues but max is %i\n",
++ netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
+ curr_qp + xdp_qp, vi->max_queue_pairs);
+- return -ENOMEM;
++ xdp_qp = 0;
+ }
+
+ old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
+@@ -2520,11 +2553,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ vi->xdp_queue_pairs = xdp_qp;
+
+ if (prog) {
++ vi->xdp_enabled = true;
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (i == 0 && !old_prog)
+ virtnet_clear_guest_offloads(vi);
+ }
++ } else {
++ vi->xdp_enabled = false;
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+@@ -2609,7 +2645,7 @@ static int virtnet_set_features(struct net_device *dev,
+ int err;
+
+ if ((dev->features ^ features) & NETIF_F_LRO) {
+- if (vi->xdp_queue_pairs)
++ if (vi->xdp_enabled)
+ return -EBUSY;
+
+ if (features & NETIF_F_LRO)
+--
+2.30.2
+
--- /dev/null
+From 382337353db50d281b5994145b84946fb5a93dac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 16:06:59 +0800
+Subject: virtio-net: use NETIF_F_GRO_HW instead of NETIF_F_LRO
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit dbcf24d153884439dad30484a0e3f02350692e4c ]
+
+Commit a02e8964eaf92 ("virtio-net: ethtool configurable LRO")
+maps LRO to virtio guest offloading features and allows the
+administrator to enable and disable those features via ethtool.
+
+This leads to several issues:
+
+- For a device that doesn't support control guest offloads, the "LRO"
+ can't be disabled triggering WARN in dev_disable_lro() when turning
+ off LRO or when enabling forwarding bridging etc.
+
+- For a device that supports control guest offloads, the guest
+ offloads are disabled in cases of bridging, forwarding etc slowing
+ down the traffic.
+
+Fix this by using NETIF_F_GRO_HW instead. Though the spec does not
+guarantee packets to be re-segmented as the original ones,
+we can add that to the spec, possibly with a flag for devices to
+differentiate between GRO and LRO.
+
+Further, we never advertised LRO historically before a02e8964eaf92
+("virtio-net: ethtool configurable LRO") and so bridged/forwarded
+configs effectively always relied on virtio receive offloads behaving
+like GRO - thus even if this breaks any configs it is at least not
+a regression.
+
+Fixes: a02e8964eaf92 ("virtio-net: ethtool configurable LRO")
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reported-by: Ivan <ivan@prestigetransportation.com>
+Tested-by: Ivan <ivan@prestigetransportation.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 36f8aeb113a8..37c2cecd1e50 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = {
+ VIRTIO_NET_F_GUEST_CSUM
+ };
+
+-#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
++#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
+ (1ULL << VIRTIO_NET_F_GUEST_UFO))
+@@ -2493,7 +2493,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
+- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
++ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
+ return -EOPNOTSUPP;
+ }
+
+@@ -2644,15 +2644,15 @@ static int virtnet_set_features(struct net_device *dev,
+ u64 offloads;
+ int err;
+
+- if ((dev->features ^ features) & NETIF_F_LRO) {
++ if ((dev->features ^ features) & NETIF_F_GRO_HW) {
+ if (vi->xdp_enabled)
+ return -EBUSY;
+
+- if (features & NETIF_F_LRO)
++ if (features & NETIF_F_GRO_HW)
+ offloads = vi->guest_offloads_capable;
+ else
+ offloads = vi->guest_offloads_capable &
+- ~GUEST_OFFLOAD_LRO_MASK;
++ ~GUEST_OFFLOAD_GRO_HW_MASK;
+
+ err = virtnet_set_guest_offloads(vi, offloads);
+ if (err)
+@@ -3128,9 +3128,9 @@ static int virtnet_probe(struct virtio_device *vdev)
+ dev->features |= NETIF_F_RXCSUM;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
+- dev->features |= NETIF_F_LRO;
++ dev->features |= NETIF_F_GRO_HW;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
+- dev->hw_features |= NETIF_F_LRO;
++ dev->hw_features |= NETIF_F_GRO_HW;
+
+ dev->vlan_features = dev->features;
+
+--
+2.30.2
+
--- /dev/null
+From ec7c95e32fa39841854ad14149368a7a80ee74a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jul 2021 17:26:47 +0300
+Subject: virtio: Protect vqs list access
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit 0e566c8f0f2e8325e35f6f97e13cde5356b41814 ]
+
+VQs may be accessed to mark the device broken while they are
+created/destroyed. Hence protect the access to the vqs list.
+
+Fixes: e2dcdfe95c0b ("virtio: virtio_break_device() to mark all virtqueues broken.")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Link: https://lore.kernel.org/r/20210721142648.1525924-4-parav@nvidia.com
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/virtio/virtio.c | 1 +
+ drivers/virtio/virtio_ring.c | 8 ++++++++
+ include/linux/virtio.h | 1 +
+ 3 files changed, 10 insertions(+)
+
+diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
+index a977e32a88f2..59a05f1b8105 100644
+--- a/drivers/virtio/virtio.c
++++ b/drivers/virtio/virtio.c
+@@ -342,6 +342,7 @@ int register_virtio_device(struct virtio_device *dev)
+ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+ INIT_LIST_HEAD(&dev->vqs);
++ spin_lock_init(&dev->vqs_list_lock);
+
+ /*
+ * device_add() causes the bus infrastructure to look for a matching
+diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
+index 97e8a195e18f..f6011c9ed32f 100644
+--- a/drivers/virtio/virtio_ring.c
++++ b/drivers/virtio/virtio_ring.c
+@@ -1668,7 +1668,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
+ cpu_to_le16(vq->packed.event_flags_shadow);
+ }
+
++ spin_lock(&vdev->vqs_list_lock);
+ list_add_tail(&vq->vq.list, &vdev->vqs);
++ spin_unlock(&vdev->vqs_list_lock);
+ return &vq->vq;
+
+ err_desc_extra:
+@@ -2126,7 +2128,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
+ memset(vq->split.desc_state, 0, vring.num *
+ sizeof(struct vring_desc_state_split));
+
++ spin_lock(&vdev->vqs_list_lock);
+ list_add_tail(&vq->vq.list, &vdev->vqs);
++ spin_unlock(&vdev->vqs_list_lock);
+ return &vq->vq;
+ }
+ EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
+@@ -2210,7 +2214,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
+ }
+ if (!vq->packed_ring)
+ kfree(vq->split.desc_state);
++ spin_lock(&vq->vq.vdev->vqs_list_lock);
+ list_del(&_vq->list);
++ spin_unlock(&vq->vq.vdev->vqs_list_lock);
+ kfree(vq);
+ }
+ EXPORT_SYMBOL_GPL(vring_del_virtqueue);
+@@ -2274,10 +2280,12 @@ void virtio_break_device(struct virtio_device *dev)
+ {
+ struct virtqueue *_vq;
+
++ spin_lock(&dev->vqs_list_lock);
+ list_for_each_entry(_vq, &dev->vqs, list) {
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ vq->broken = true;
+ }
++ spin_unlock(&dev->vqs_list_lock);
+ }
+ EXPORT_SYMBOL_GPL(virtio_break_device);
+
+diff --git a/include/linux/virtio.h b/include/linux/virtio.h
+index 15f906e4a748..7c075463c7f2 100644
+--- a/include/linux/virtio.h
++++ b/include/linux/virtio.h
+@@ -111,6 +111,7 @@ struct virtio_device {
+ bool config_enabled;
+ bool config_change_pending;
+ spinlock_t config_lock;
++ spinlock_t vqs_list_lock; /* Protects VQs list access */
+ struct device dev;
+ struct virtio_device_id id;
+ const struct virtio_config_ops *config;
+--
+2.30.2
+
--- /dev/null
+From 45aad27907403d19cf8e78488efd99f1e02b1506 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 Aug 2021 12:00:02 +0000
+Subject: vrf: Reset skb conntrack connection on VRF rcv
+
+From: Lahav Schlesinger <lschlesinger@drivenets.com>
+
+[ Upstream commit 09e856d54bda5f288ef8437a90ab2b9b3eab83d1 ]
+
+To fix the "reverse-NAT" for replies.
+
+When a packet is sent over a VRF, the POST_ROUTING hooks are called
+twice: Once from the VRF interface, and once from the "actual"
+interface the packet will be sent from:
+1) First SNAT: l3mdev_l3_out() -> vrf_l3_out() -> .. -> vrf_output_direct()
+ This causes the POST_ROUTING hooks to run.
+2) Second SNAT: 'ip_output()' calls POST_ROUTING hooks again.
+
+Similarly for replies, first ip_rcv() calls PRE_ROUTING hooks, and
+second vrf_l3_rcv() calls them again.
+
+As an example, consider the following SNAT rule:
+> iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j SNAT --to-source 2.2.2.2 -o vrf_1
+
+In this case sending over a VRF will create 2 conntrack entries.
+The first is from the VRF interface, which performs the IP SNAT.
+The second will run the SNAT, but since the "expected reply" will remain
+the same, conntrack randomizes the source port of the packet:
+e..g With a socket bound to 1.1.1.1:10000, sending to 3.3.3.3:53, the conntrack
+rules are:
+udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=0 bytes=0 mark=0 use=1
+udp 17 29 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1
+
+i.e. First SNAT IP from 1.1.1.1 --> 2.2.2.2, and second the src port is
+SNAT-ed from 10000 --> 61033.
+
+But when a reply is sent (3.3.3.3:53 -> 2.2.2.2:61033) only the later
+conntrack entry is matched:
+udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=1 bytes=49 mark=0 use=1
+udp 17 28 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1
+
+And a "port 61033 unreachable" ICMP packet is sent back.
+
+The issue is that when PRE_ROUTING hooks are called from vrf_l3_rcv(),
+the skb already has a conntrack flow attached to it, which means
+nf_conntrack_in() will not resolve the flow again.
+
+This means only the dest port is "reverse-NATed" (61033 -> 10000) but
+the dest IP remains 2.2.2.2, and since the socket is bound to 1.1.1.1 it's
+not received.
+This can be verified by logging the 4-tuple of the packet in '__udp4_lib_rcv()'.
+
+The fix is then to reset the flow when skb is received on a VRF, to let
+conntrack resolve the flow again (which now will hit the earlier flow).
+
+To reproduce: (Without the fix "Got pkt_to_nat_port" will not be printed by
+ running 'bash ./repro'):
+ $ cat run_in_A1.py
+ import logging
+ logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
+ from scapy.all import *
+ import argparse
+
+ def get_packet_to_send(udp_dst_port, msg_name):
+ return Ether(src='11:22:33:44:55:66', dst=iface_mac)/ \
+ IP(src='3.3.3.3', dst='2.2.2.2')/ \
+ UDP(sport=53, dport=udp_dst_port)/ \
+ Raw(f'{msg_name}\x0012345678901234567890')
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-iface_mac', dest="iface_mac", type=str, required=True,
+ help="From run_in_A3.py")
+ parser.add_argument('-socket_port', dest="socket_port", type=str,
+ required=True, help="From run_in_A3.py")
+ parser.add_argument('-v1_mac', dest="v1_mac", type=str, required=True,
+ help="From script")
+
+ args, _ = parser.parse_known_args()
+ iface_mac = args.iface_mac
+ socket_port = int(args.socket_port)
+ v1_mac = args.v1_mac
+
+ print(f'Source port before NAT: {socket_port}')
+
+ while True:
+ pkts = sniff(iface='_v0', store=True, count=1, timeout=10)
+ if 0 == len(pkts):
+ print('Something failed, rerun the script :(', flush=True)
+ break
+ pkt = pkts[0]
+ if not pkt.haslayer('UDP'):
+ continue
+
+ pkt_sport = pkt.getlayer('UDP').sport
+ print(f'Source port after NAT: {pkt_sport}', flush=True)
+
+ pkt_to_send = get_packet_to_send(pkt_sport, 'pkt_to_nat_port')
+ sendp(pkt_to_send, '_v0', verbose=False) # Will not be received
+
+ pkt_to_send = get_packet_to_send(socket_port, 'pkt_to_socket_port')
+ sendp(pkt_to_send, '_v0', verbose=False)
+ break
+
+ $ cat run_in_A2.py
+ import socket
+ import netifaces
+
+ print(f"{netifaces.ifaddresses('e00000')[netifaces.AF_LINK][0]['addr']}",
+ flush=True)
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE,
+ str('vrf_1' + '\0').encode('utf-8'))
+ s.connect(('3.3.3.3', 53))
+ print(f'{s. getsockname()[1]}', flush=True)
+ s.settimeout(5)
+
+ while True:
+ try:
+ # Periodically send in order to keep the conntrack entry alive.
+ s.send(b'a'*40)
+ resp = s.recvfrom(1024)
+ msg_name = resp[0].decode('utf-8').split('\0')[0]
+ print(f"Got {msg_name}", flush=True)
+ except Exception as e:
+ pass
+
+ $ cat repro.sh
+ ip netns del A1 2> /dev/null
+ ip netns del A2 2> /dev/null
+ ip netns add A1
+ ip netns add A2
+
+ ip -n A1 link add _v0 type veth peer name _v1 netns A2
+ ip -n A1 link set _v0 up
+
+ ip -n A2 link add e00000 type bond
+ ip -n A2 link add lo0 type dummy
+ ip -n A2 link add vrf_1 type vrf table 10001
+ ip -n A2 link set vrf_1 up
+ ip -n A2 link set e00000 master vrf_1
+
+ ip -n A2 addr add 1.1.1.1/24 dev e00000
+ ip -n A2 link set e00000 up
+ ip -n A2 link set _v1 master e00000
+ ip -n A2 link set _v1 up
+ ip -n A2 link set lo0 up
+ ip -n A2 addr add 2.2.2.2/32 dev lo0
+
+ ip -n A2 neigh add 1.1.1.10 lladdr 77:77:77:77:77:77 dev e00000
+ ip -n A2 route add 3.3.3.3/32 via 1.1.1.10 dev e00000 table 10001
+
+ ip netns exec A2 iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j \
+ SNAT --to-source 2.2.2.2 -o vrf_1
+
+ sleep 5
+ ip netns exec A2 python3 run_in_A2.py > x &
+ XPID=$!
+ sleep 5
+
+ IFACE_MAC=`sed -n 1p x`
+ SOCKET_PORT=`sed -n 2p x`
+ V1_MAC=`ip -n A2 link show _v1 | sed -n 2p | awk '{print $2'}`
+ ip netns exec A1 python3 run_in_A1.py -iface_mac ${IFACE_MAC} -socket_port \
+ ${SOCKET_PORT} -v1_mac ${SOCKET_PORT}
+ sleep 5
+
+ kill -9 $XPID
+ wait $XPID 2> /dev/null
+ ip netns del A1
+ ip netns del A2
+ tail x -n 2
+ rm x
+ set +x
+
+Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device")
+Signed-off-by: Lahav Schlesinger <lschlesinger@drivenets.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20210815120002.2787653-1-lschlesinger@drivenets.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index f08ed52d51f3..9b626c169554 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1036,6 +1036,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+ bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+ bool is_ndisc = ipv6_ndisc_frame(skb);
+
++ nf_reset_ct(skb);
++
+ /* loopback, multicast & non-ND link-local traffic; do not push through
+ * packet taps again. Reset pkt_type for upper layers to process skb.
+ * For strict packets with a source LLA, determine the dst using the
+@@ -1092,6 +1094,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
+ skb->skb_iif = vrf_dev->ifindex;
+ IPCB(skb)->flags |= IPSKB_L3SLAVE;
+
++ nf_reset_ct(skb);
++
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+ goto out;
+
+--
+2.30.2
+