]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Sun, 22 Aug 2021 02:39:19 +0000 (22:39 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 22 Aug 2021 02:39:19 +0000 (22:39 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
queue-5.4/bnxt-disable-napi-before-canceling-dim.patch [new file with mode: 0644]
queue-5.4/bnxt-don-t-lock-the-tx-queue-from-napi-poll.patch [new file with mode: 0644]
queue-5.4/bnxt_en-add-missing-dma-memory-barriers.patch [new file with mode: 0644]
queue-5.4/bpf-clear-zext_dst-of-dead-insns.patch [new file with mode: 0644]
queue-5.4/cpufreq-armada-37xx-forbid-cpufreq-for-1.2-ghz-varia.patch [new file with mode: 0644]
queue-5.4/dccp-add-do-while-0-stubs-for-dccp_pr_debug-macros.patch [new file with mode: 0644]
queue-5.4/i40e-fix-atr-queue-selection.patch [new file with mode: 0644]
queue-5.4/iavf-fix-ping-is-lost-after-untrusted-vf-had-tried-t.patch [new file with mode: 0644]
queue-5.4/iommu-check-if-group-is-null-before-remove-device.patch [new file with mode: 0644]
queue-5.4/net-6pack-fix-slab-out-of-bounds-in-decode_data.patch [new file with mode: 0644]
queue-5.4/net-mdio-mux-don-t-ignore-memory-allocation-errors.patch [new file with mode: 0644]
queue-5.4/net-mdio-mux-handle-eprobe_defer-correctly.patch [new file with mode: 0644]
queue-5.4/net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch [new file with mode: 0644]
queue-5.4/ovs-clear-skb-tstamp-in-forwarding-path.patch [new file with mode: 0644]
queue-5.4/ptp_pch-restore-dependency-on-pci.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/vhost-fix-the-calculation-in-vhost_overflow.patch [new file with mode: 0644]
queue-5.4/virtio-net-support-xdp-when-not-more-queues.patch [new file with mode: 0644]
queue-5.4/virtio-net-use-netif_f_gro_hw-instead-of-netif_f_lro.patch [new file with mode: 0644]
queue-5.4/virtio-protect-vqs-list-access.patch [new file with mode: 0644]
queue-5.4/vrf-reset-skb-conntrack-connection-on-vrf-rcv.patch [new file with mode: 0644]

diff --git a/queue-5.4/bnxt-disable-napi-before-canceling-dim.patch b/queue-5.4/bnxt-disable-napi-before-canceling-dim.patch
new file mode 100644 (file)
index 0000000..59c25b8
--- /dev/null
@@ -0,0 +1,43 @@
+From fb5dd59dcee96c6e83b29515d03c793d670f454e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 14:42:40 -0700
+Subject: bnxt: disable napi before canceling DIM
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 01cca6b9330ac7460de44eeeb3a0607f8aae69ff ]
+
+napi schedules DIM, napi has to be disabled first,
+then DIM canceled.
+
+Noticed while reading the code.
+
+Fixes: 0bc0b97fca73 ("bnxt_en: cleanup DIM work on device shutdown")
+Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 8111aefb2411..1b5839ad97b6 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8346,10 +8346,9 @@ static void bnxt_disable_napi(struct bnxt *bp)
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
++              napi_disable(&bp->bnapi[i]->napi);
+               if (bp->bnapi[i]->rx_ring)
+                       cancel_work_sync(&cpr->dim.work);
+-
+-              napi_disable(&bp->bnapi[i]->napi);
+       }
+ }
+-- 
+2.30.2
+
diff --git a/queue-5.4/bnxt-don-t-lock-the-tx-queue-from-napi-poll.patch b/queue-5.4/bnxt-don-t-lock-the-tx-queue-from-napi-poll.patch
new file mode 100644 (file)
index 0000000..4496a3c
--- /dev/null
@@ -0,0 +1,141 @@
+From 0b25b021a2020f746ea39d86ba542c199bf63927 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 14:42:39 -0700
+Subject: bnxt: don't lock the tx queue from napi poll
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 3c603136c9f82833813af77185618de5af67676c ]
+
+We can't take the tx lock from the napi poll routine, because
+netpoll can poll napi at any moment, including with the tx lock
+already held.
+
+The tx lock is protecting against two paths - the disable
+path, and (as Michael points out) the NETDEV_TX_BUSY case
+which may occur if NAPI completions race with start_xmit
+and both decide to re-enable the queue.
+
+For the disable/ifdown path use synchronize_net() to make sure
+closing the device does not race we restarting the queues.
+Annotate accesses to dev_state against data races.
+
+For the NAPI cleanup vs start_xmit path - appropriate barriers
+are already in place in the main spot where Tx queue is stopped
+but we need to do the same careful dance in the TX_BUSY case.
+
+Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 ++++++++++++++---------
+ 1 file changed, 32 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 287ea792922a..8111aefb2411 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -360,6 +360,26 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+       return md_dst->u.port_info.port_id;
+ }
++static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp,
++                                        struct bnxt_tx_ring_info *txr,
++                                        struct netdev_queue *txq)
++{
++      netif_tx_stop_queue(txq);
++
++      /* netif_tx_stop_queue() must be done before checking
++       * tx index in bnxt_tx_avail() below, because in
++       * bnxt_tx_int(), we update tx index before checking for
++       * netif_tx_queue_stopped().
++       */
++      smp_mb();
++      if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) {
++              netif_tx_wake_queue(txq);
++              return false;
++      }
++
++      return true;
++}
++
+ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct bnxt *bp = netdev_priv(dev);
+@@ -387,8 +407,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+       free_size = bnxt_tx_avail(bp, txr);
+       if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) {
+-              netif_tx_stop_queue(txq);
+-              return NETDEV_TX_BUSY;
++              if (bnxt_txr_netif_try_stop_queue(bp, txr, txq))
++                      return NETDEV_TX_BUSY;
+       }
+       length = skb->len;
+@@ -597,16 +617,7 @@ tx_done:
+               if (netdev_xmit_more() && !tx_buf->is_push)
+                       bnxt_db_write(bp, &txr->tx_db, prod);
+-              netif_tx_stop_queue(txq);
+-
+-              /* netif_tx_stop_queue() must be done before checking
+-               * tx index in bnxt_tx_avail() below, because in
+-               * bnxt_tx_int(), we update tx index before checking for
+-               * netif_tx_queue_stopped().
+-               */
+-              smp_mb();
+-              if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)
+-                      netif_tx_wake_queue(txq);
++              bnxt_txr_netif_try_stop_queue(bp, txr, txq);
+       }
+       return NETDEV_TX_OK;
+@@ -690,14 +701,9 @@ next_tx_int:
+       smp_mb();
+       if (unlikely(netif_tx_queue_stopped(txq)) &&
+-          (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) {
+-              __netif_tx_lock(txq, smp_processor_id());
+-              if (netif_tx_queue_stopped(txq) &&
+-                  bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
+-                  txr->dev_state != BNXT_DEV_STATE_CLOSING)
+-                      netif_tx_wake_queue(txq);
+-              __netif_tx_unlock(txq);
+-      }
++          bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh &&
++          READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING)
++              netif_tx_wake_queue(txq);
+ }
+ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+@@ -8371,9 +8377,11 @@ void bnxt_tx_disable(struct bnxt *bp)
+       if (bp->tx_ring) {
+               for (i = 0; i < bp->tx_nr_rings; i++) {
+                       txr = &bp->tx_ring[i];
+-                      txr->dev_state = BNXT_DEV_STATE_CLOSING;
++                      WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING);
+               }
+       }
++      /* Make sure napi polls see @dev_state change */
++      synchronize_net();
+       /* Drop carrier first to prevent TX timeout */
+       netif_carrier_off(bp->dev);
+       /* Stop all TX queues */
+@@ -8387,8 +8395,10 @@ void bnxt_tx_enable(struct bnxt *bp)
+       for (i = 0; i < bp->tx_nr_rings; i++) {
+               txr = &bp->tx_ring[i];
+-              txr->dev_state = 0;
++              WRITE_ONCE(txr->dev_state, 0);
+       }
++      /* Make sure napi polls see @dev_state change */
++      synchronize_net();
+       netif_tx_wake_all_queues(bp->dev);
+       if (bp->link_info.link_up)
+               netif_carrier_on(bp->dev);
+-- 
+2.30.2
+
diff --git a/queue-5.4/bnxt_en-add-missing-dma-memory-barriers.patch b/queue-5.4/bnxt_en-add-missing-dma-memory-barriers.patch
new file mode 100644 (file)
index 0000000..9b990de
--- /dev/null
@@ -0,0 +1,70 @@
+From a6ad1b5453c70185001ef64440f2953e322ff991 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 Aug 2021 16:15:37 -0400
+Subject: bnxt_en: Add missing DMA memory barriers
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 828affc27ed43441bd1efdaf4e07e96dd43a0362 ]
+
+Each completion ring entry has a valid bit to indicate that the entry
+contains a valid completion event.  The driver's main poll loop
+__bnxt_poll_work() has the proper dma_rmb() to make sure the valid
+bit of the next entry has been checked before proceeding further.
+But when we call bnxt_rx_pkt() to process the RX event, the RX
+completion event consists of two completion entries and only the
+first entry has been checked to be valid.  We need the same barrier
+after checking the next completion entry.  Add missing dma_rmb()
+barriers in bnxt_rx_pkt() and other similar locations.
+
+Fixes: 67a95e2022c7 ("bnxt_en: Need memory barrier when processing the completion ring.")
+Reported-by: Lance Richardson <lance.richardson@broadcom.com>
+Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
+Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 1b5839ad97b6..e67f07faca78 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1724,6 +1724,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+       if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+               return -EBUSY;
++      /* The valid test of the entry must be done first before
++       * reading any further.
++       */
++      dma_rmb();
+       prod = rxr->rx_prod;
+       if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
+@@ -1918,6 +1922,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
+       if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+               return -EBUSY;
++      /* The valid test of the entry must be done first before
++       * reading any further.
++       */
++      dma_rmb();
+       cmp_type = RX_CMP_TYPE(rxcmp);
+       if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+               rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+@@ -2314,6 +2322,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
+               if (!TX_CMP_VALID(txcmp, raw_cons))
+                       break;
++              /* The valid test of the entry must be done first before
++               * reading any further.
++               */
++              dma_rmb();
+               if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
+                       tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
+                       cp_cons = RING_CMP(tmp_raw_cons);
+-- 
+2.30.2
+
diff --git a/queue-5.4/bpf-clear-zext_dst-of-dead-insns.patch b/queue-5.4/bpf-clear-zext_dst-of-dead-insns.patch
new file mode 100644 (file)
index 0000000..d099f95
--- /dev/null
@@ -0,0 +1,65 @@
+From c2c3d8d0a10e43195e67431bdf1a5431a5546ce9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 17:18:10 +0200
+Subject: bpf: Clear zext_dst of dead insns
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+[ Upstream commit 45c709f8c71b525b51988e782febe84ce933e7e0 ]
+
+"access skb fields ok" verifier test fails on s390 with the "verifier
+bug. zext_dst is set, but no reg is defined" message. The first insns
+of the test prog are ...
+
+   0:  61 01 00 00 00 00 00 00         ldxw %r0,[%r1+0]
+   8:  35 00 00 01 00 00 00 00         jge %r0,0,1
+  10:  61 01 00 08 00 00 00 00         ldxw %r0,[%r1+8]
+
+... and the 3rd one is dead (this does not look intentional to me, but
+this is a separate topic).
+
+sanitize_dead_code() converts dead insns into "ja -1", but keeps
+zext_dst. When opt_subreg_zext_lo32_rnd_hi32() tries to parse such
+an insn, it sees this discrepancy and bails. This problem can be seen
+only with JITs whose bpf_jit_needs_zext() returns true.
+
+Fix by clearning dead insns' zext_dst.
+
+The commits that contributed to this problem are:
+
+1. 5aa5bd14c5f8 ("bpf: add initial suite for selftests"), which
+   introduced the test with the dead code.
+2. 5327ed3d44b7 ("bpf: verifier: mark verified-insn with
+   sub-register zext flag"), which introduced the zext_dst flag.
+3. 83a2881903f3 ("bpf: Account for BPF_FETCH in
+   insn_has_def32()"), which introduced the sanity check.
+4. 9183671af6db ("bpf: Fix leakage under speculation on
+   mispredicted branches"), which bisect points to.
+
+It's best to fix this on stable branches that contain the second one,
+since that's the point where the inconsistency was introduced.
+
+Fixes: 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20210812151811.184086-2-iii@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 52c2b11a0b47..0b5a446ee59c 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -8586,6 +8586,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
+               if (aux_data[i].seen)
+                       continue;
+               memcpy(insn + i, &trap, sizeof(trap));
++              aux_data[i].zext_dst = false;
+       }
+ }
+-- 
+2.30.2
+
diff --git a/queue-5.4/cpufreq-armada-37xx-forbid-cpufreq-for-1.2-ghz-varia.patch b/queue-5.4/cpufreq-armada-37xx-forbid-cpufreq-for-1.2-ghz-varia.patch
new file mode 100644 (file)
index 0000000..94a6604
--- /dev/null
@@ -0,0 +1,54 @@
+From c78c895f3773d925a51bbe89e33799e27293a6c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Jul 2021 00:56:01 +0200
+Subject: cpufreq: armada-37xx: forbid cpufreq for 1.2 GHz variant
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek BehĂșn <kabel@kernel.org>
+
+[ Upstream commit 484f2b7c61b9ae58cc00c5127bcbcd9177af8dfe ]
+
+The 1.2 GHz variant of the Armada 3720 SOC is unstable with DVFS: when
+the SOC boots, the WTMI firmware sets clocks and AVS values that work
+correctly with 1.2 GHz CPU frequency, but random crashes occur once
+cpufreq driver starts scaling.
+
+We do not know currently what is the reason:
+- it may be that the voltage value for L0 for 1.2 GHz variant provided
+  by the vendor in the OTP is simply incorrect when scaling is used,
+- it may be that some delay is needed somewhere,
+- it may be something else.
+
+The most sane solution now seems to be to simply forbid the cpufreq
+driver on 1.2 GHz variant.
+
+Signed-off-by: Marek BehĂșn <kabel@kernel.org>
+Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx")
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/armada-37xx-cpufreq.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
+index e4782f562e7a..2de7fd18f66a 100644
+--- a/drivers/cpufreq/armada-37xx-cpufreq.c
++++ b/drivers/cpufreq/armada-37xx-cpufreq.c
+@@ -102,7 +102,11 @@ struct armada_37xx_dvfs {
+ };
+ static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
+-      {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} },
++      /*
++       * The cpufreq scaling for 1.2 GHz variant of the SOC is currently
++       * unstable because we do not know how to configure it properly.
++       */
++      /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */
+       {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} },
+       {.cpu_freq_max = 800*1000*1000,  .divider = {1, 2, 3, 4} },
+       {.cpu_freq_max = 600*1000*1000,  .divider = {2, 4, 5, 6} },
+-- 
+2.30.2
+
diff --git a/queue-5.4/dccp-add-do-while-0-stubs-for-dccp_pr_debug-macros.patch b/queue-5.4/dccp-add-do-while-0-stubs-for-dccp_pr_debug-macros.patch
new file mode 100644 (file)
index 0000000..36cb885
--- /dev/null
@@ -0,0 +1,54 @@
+From 2eada32d4174313977c057de3747d7f2ce665fa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 8 Aug 2021 16:04:40 -0700
+Subject: dccp: add do-while-0 stubs for dccp_pr_debug macros
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 86aab09a4870bb8346c9579864588c3d7f555299 ]
+
+GCC complains about empty macros in an 'if' statement, so convert
+them to 'do {} while (0)' macros.
+
+Fixes these build warnings:
+
+net/dccp/output.c: In function 'dccp_xmit_packet':
+../net/dccp/output.c:283:71: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
+  283 |                 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+net/dccp/ackvec.c: In function 'dccp_ackvec_update_old':
+../net/dccp/ackvec.c:163:80: warning: suggest braces around empty body in an 'else' statement [-Wempty-body]
+  163 |                                               (unsigned long long)seqno, state);
+
+Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface")
+Fixes: 380240864451 ("dccp ccid-2: Update code for the Ack Vector input/registration routine")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: dccp@vger.kernel.org
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dccp/dccp.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
+index 9c3b27c257bb..cb818617699c 100644
+--- a/net/dccp/dccp.h
++++ b/net/dccp/dccp.h
+@@ -41,9 +41,9 @@ extern bool dccp_debug;
+ #define dccp_pr_debug_cat(format, a...)   DCCP_PRINTK(dccp_debug, format, ##a)
+ #define dccp_debug(fmt, a...)           dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
+ #else
+-#define dccp_pr_debug(format, a...)
+-#define dccp_pr_debug_cat(format, a...)
+-#define dccp_debug(format, a...)
++#define dccp_pr_debug(format, a...)     do {} while (0)
++#define dccp_pr_debug_cat(format, a...)         do {} while (0)
++#define dccp_debug(format, a...)        do {} while (0)
+ #endif
+ extern struct inet_hashinfo dccp_hashinfo;
+-- 
+2.30.2
+
diff --git a/queue-5.4/i40e-fix-atr-queue-selection.patch b/queue-5.4/i40e-fix-atr-queue-selection.patch
new file mode 100644 (file)
index 0000000..299cc5b
--- /dev/null
@@ -0,0 +1,59 @@
+From a4228012932c24b1bb7c2646320b4ea29d6c386d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:42:16 -0700
+Subject: i40e: Fix ATR queue selection
+
+From: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+
+[ Upstream commit a222be597e316389f9f8c26033352c124ce93056 ]
+
+Without this patch, ATR does not work. Receive/transmit uses queue
+selection based on SW DCB hashing method.
+
+If traffic classes are not configured for PF, then use
+netdev_pick_tx function for selecting queue for packet transmission.
+Instead of calling i40e_swdcb_skb_tx_hash, call netdev_pick_tx,
+which ensures that packet is transmitted/received from CPU that is
+running the application.
+
+Reproduction steps:
+1. Load i40e driver
+2. Map each MSI interrupt of i40e port for each CPU
+3. Disable ntuple, enable ATR i.e.:
+ethtool -K $interface ntuple off
+ethtool --set-priv-flags $interface flow-director-atr
+4. Run application that is generating traffic and is bound to a
+single CPU, i.e.:
+taskset -c 9 netperf -H 1.1.1.1 -t TCP_RR -l 10
+5. Observe behavior:
+Application's traffic should be restricted to the CPU provided in
+taskset.
+
+Fixes: 89ec1f0886c1 ("i40e: Fix queue-to-TC mapping on Tx")
+Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 8e38c547b53f..06987913837a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -3553,8 +3553,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
+       /* is DCB enabled at all? */
+       if (vsi->tc_config.numtc == 1)
+-              return i40e_swdcb_skb_tx_hash(netdev, skb,
+-                                            netdev->real_num_tx_queues);
++              return netdev_pick_tx(netdev, skb, sb_dev);
+       prio = skb->priority;
+       hw = &vsi->back->hw;
+-- 
+2.30.2
+
diff --git a/queue-5.4/iavf-fix-ping-is-lost-after-untrusted-vf-had-tried-t.patch b/queue-5.4/iavf-fix-ping-is-lost-after-untrusted-vf-had-tried-t.patch
new file mode 100644 (file)
index 0000000..9da8843
--- /dev/null
@@ -0,0 +1,133 @@
+From 8d57b41785021e0729054590d7009f7220b41e2e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:42:17 -0700
+Subject: iavf: Fix ping is lost after untrusted VF had tried to change MAC
+
+From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+
+[ Upstream commit 8da80c9d50220a8e4190a4eaa0dd6aeefcbbb5bf ]
+
+Make changes to MAC address dependent on the response of PF.
+Disallow changes to HW MAC address and MAC filter from untrusted
+VF, thanks to that ping is not lost if VF tries to change MAC.
+Add a new field in iavf_mac_filter, to indicate whether there
+was response from PF for given filter. Based on this field pass
+or discard the filter.
+If untrusted VF tried to change it's address, it's not changed.
+Still filter was changed, because of that ping couldn't go through.
+
+Fixes: c5c922b3e09b ("iavf: fix MAC address setting for VFs when filter is rejected")
+Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Gurucharan G <Gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h        |  1 +
+ drivers/net/ethernet/intel/iavf/iavf_main.c   |  1 +
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   | 47 ++++++++++++++++++-
+ 3 files changed, 47 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 6b9117a350fa..81ca6472937d 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -134,6 +134,7 @@ struct iavf_q_vector {
+ struct iavf_mac_filter {
+       struct list_head list;
+       u8 macaddr[ETH_ALEN];
++      bool is_new_mac;        /* filter is new, wait for PF decision */
+       bool remove;            /* filter needs to be removed */
+       bool add;               /* filter needs to be added */
+ };
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index dc902e371c2c..94a3f000e999 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -761,6 +761,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+               list_add_tail(&f->list, &adapter->mac_filter_list);
+               f->add = true;
++              f->is_new_mac = true;
+               adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+       } else {
+               f->remove = false;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 9655318803b7..4d471a6f2946 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -564,6 +564,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
+       kfree(veal);
+ }
++/**
++ * iavf_mac_add_ok
++ * @adapter: adapter structure
++ *
++ * Submit list of filters based on PF response.
++ **/
++static void iavf_mac_add_ok(struct iavf_adapter *adapter)
++{
++      struct iavf_mac_filter *f, *ftmp;
++
++      spin_lock_bh(&adapter->mac_vlan_list_lock);
++      list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
++              f->is_new_mac = false;
++      }
++      spin_unlock_bh(&adapter->mac_vlan_list_lock);
++}
++
++/**
++ * iavf_mac_add_reject
++ * @adapter: adapter structure
++ *
++ * Remove filters from list based on PF response.
++ **/
++static void iavf_mac_add_reject(struct iavf_adapter *adapter)
++{
++      struct net_device *netdev = adapter->netdev;
++      struct iavf_mac_filter *f, *ftmp;
++
++      spin_lock_bh(&adapter->mac_vlan_list_lock);
++      list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
++              if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr))
++                      f->remove = false;
++
++              if (f->is_new_mac) {
++                      list_del(&f->list);
++                      kfree(f);
++              }
++      }
++      spin_unlock_bh(&adapter->mac_vlan_list_lock);
++}
++
+ /**
+  * iavf_add_vlans
+  * @adapter: adapter structure
+@@ -1316,6 +1357,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               case VIRTCHNL_OP_ADD_ETH_ADDR:
+                       dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
+                               iavf_stat_str(&adapter->hw, v_retval));
++                      iavf_mac_add_reject(adapter);
+                       /* restore administratively set MAC address */
+                       ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+                       break;
+@@ -1385,10 +1427,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               }
+       }
+       switch (v_opcode) {
+-      case VIRTCHNL_OP_ADD_ETH_ADDR: {
++      case VIRTCHNL_OP_ADD_ETH_ADDR:
++              if (!v_retval)
++                      iavf_mac_add_ok(adapter);
+               if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+                       ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+-              }
+               break;
+       case VIRTCHNL_OP_GET_STATS: {
+               struct iavf_eth_stats *stats =
+-- 
+2.30.2
+
diff --git a/queue-5.4/iommu-check-if-group-is-null-before-remove-device.patch b/queue-5.4/iommu-check-if-group-is-null-before-remove-device.patch
new file mode 100644 (file)
index 0000000..193cce5
--- /dev/null
@@ -0,0 +1,55 @@
+From 31572e7a3b58674bcd68893db81d12835435fdcd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 31 Jul 2021 09:47:37 +0200
+Subject: iommu: Check if group is NULL before remove device
+
+From: Frank Wunderlich <frank-w@public-files.de>
+
+[ Upstream commit 5aa95d8834e07907e64937d792c12ffef7fb271f ]
+
+If probe_device is failing, iommu_group is not initialized because
+iommu_group_add_device is not reached, so freeing it will result
+in NULL pointer access.
+
+iommu_bus_init
+  ->bus_iommu_probe
+      ->probe_iommu_group in for each:/* return -22 in fail case */
+          ->iommu_probe_device
+              ->__iommu_probe_device       /* return -22 here.*/
+                  -> ops->probe_device          /* return -22 here.*/
+                  -> iommu_group_get_for_dev
+                        -> ops->device_group
+                        -> iommu_group_add_device //good case
+  ->remove_iommu_group  //in fail case, it will remove group
+     ->iommu_release_device
+         ->iommu_group_remove_device // here we don't have group
+
+In my case ops->probe_device (mtk_iommu_probe_device from
+mtk_iommu_v1.c) is due to failing fwspec->ops mismatch.
+
+Fixes: d72e31c93746 ("iommu: IOMMU Groups")
+Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
+Link: https://lore.kernel.org/r/20210731074737.4573-1-linux@fw-web.de
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/iommu.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 9d7232e26ecf..c5758fb696cc 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -775,6 +775,9 @@ void iommu_group_remove_device(struct device *dev)
+       struct iommu_group *group = dev->iommu_group;
+       struct group_device *tmp_device, *device = NULL;
++      if (!group)
++              return;
++
+       dev_info(dev, "Removing from iommu group %d\n", group->id);
+       /* Pre-notify listeners that a device is being removed. */
+-- 
+2.30.2
+
diff --git a/queue-5.4/net-6pack-fix-slab-out-of-bounds-in-decode_data.patch b/queue-5.4/net-6pack-fix-slab-out-of-bounds-in-decode_data.patch
new file mode 100644 (file)
index 0000000..f7b99f3
--- /dev/null
@@ -0,0 +1,67 @@
+From f56ebc8fee81e13d5378c9b1789dd4764c03b909 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Aug 2021 18:14:33 +0300
+Subject: net: 6pack: fix slab-out-of-bounds in decode_data
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 19d1532a187669ce86d5a2696eb7275310070793 ]
+
+Syzbot reported slab-out-of bounds write in decode_data().
+The problem was in missing validation checks.
+
+Syzbot's reproducer generated malicious input, which caused
+decode_data() to be called a lot in sixpack_decode(). Since
+rx_count_cooked is only 400 bytes and noone reported before,
+that 400 bytes is not enough, let's just check if input is malicious
+and complain about buffer overrun.
+
+Fail log:
+==================================================================
+BUG: KASAN: slab-out-of-bounds in drivers/net/hamradio/6pack.c:843
+Write of size 1 at addr ffff888087c5544e by task kworker/u4:0/7
+
+CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.6.0-rc3-syzkaller #0
+...
+Workqueue: events_unbound flush_to_ldisc
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x197/0x210 lib/dump_stack.c:118
+ print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374
+ __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506
+ kasan_report+0x12/0x20 mm/kasan/common.c:641
+ __asan_report_store1_noabort+0x17/0x20 mm/kasan/generic_report.c:137
+ decode_data.part.0+0x23b/0x270 drivers/net/hamradio/6pack.c:843
+ decode_data drivers/net/hamradio/6pack.c:965 [inline]
+ sixpack_decode drivers/net/hamradio/6pack.c:968 [inline]
+
+Reported-and-tested-by: syzbot+fc8cd9a673d4577fb2e4@syzkaller.appspotmail.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hamradio/6pack.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
+index 71d6629e65c9..da13683d52d1 100644
+--- a/drivers/net/hamradio/6pack.c
++++ b/drivers/net/hamradio/6pack.c
+@@ -839,6 +839,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte)
+               return;
+       }
++      if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) {
++              pr_err("6pack: cooked buffer overrun, data loss\n");
++              sp->rx_count = 0;
++              return;
++      }
++
+       buf = sp->raw_buf;
+       sp->cooked_buf[sp->rx_count_cooked++] =
+               buf[0] | ((buf[1] << 2) & 0xc0);
+-- 
+2.30.2
+
diff --git a/queue-5.4/net-mdio-mux-don-t-ignore-memory-allocation-errors.patch b/queue-5.4/net-mdio-mux-don-t-ignore-memory-allocation-errors.patch
new file mode 100644 (file)
index 0000000..2eeace2
--- /dev/null
@@ -0,0 +1,96 @@
+From ef19b1e55f0cd20877624b2fe850bbd725b1fc6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 20:38:02 -0700
+Subject: net: mdio-mux: Don't ignore memory allocation errors
+
+From: Saravana Kannan <saravanak@google.com>
+
+[ Upstream commit 99d81e942474cc7677d12f673f42a7ea699e2589 ]
+
+If we are seeing memory allocation errors, don't try to continue
+registering child mdiobus devices. It's unlikely they'll succeed.
+
+Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose")
+Signed-off-by: Saravana Kannan <saravanak@google.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Tested-by: Marc Zyngier <maz@kernel.org>
+Acked-by: Kevin Hilman <khilman@baylibre.com>
+Tested-by: Kevin Hilman <khilman@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/mdio-mux.c | 28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
+index 6a1d3540210b..c96ef3b3fa3a 100644
+--- a/drivers/net/phy/mdio-mux.c
++++ b/drivers/net/phy/mdio-mux.c
+@@ -82,6 +82,17 @@ out:
+ static int parent_count;
++static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb)
++{
++      struct mdio_mux_child_bus *cb = pb->children;
++
++      while (cb) {
++              mdiobus_unregister(cb->mii_bus);
++              mdiobus_free(cb->mii_bus);
++              cb = cb->next;
++      }
++}
++
+ int mdio_mux_init(struct device *dev,
+                 struct device_node *mux_node,
+                 int (*switch_fn)(int cur, int desired, void *data),
+@@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev,
+               cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL);
+               if (!cb) {
+                       ret_val = -ENOMEM;
+-                      continue;
++                      goto err_loop;
+               }
+               cb->bus_number = v;
+               cb->parent = pb;
+@@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev,
+               cb->mii_bus = mdiobus_alloc();
+               if (!cb->mii_bus) {
+                       ret_val = -ENOMEM;
+-                      devm_kfree(dev, cb);
+-                      continue;
++                      goto err_loop;
+               }
+               cb->mii_bus->priv = cb;
+@@ -182,6 +192,10 @@ int mdio_mux_init(struct device *dev,
+       dev_err(dev, "Error: No acceptable child buses found\n");
+       devm_kfree(dev, pb);
++
++err_loop:
++      mdio_mux_uninit_children(pb);
++      of_node_put(child_bus_node);
+ err_pb_kz:
+       put_device(&parent_bus->dev);
+ err_parent_bus:
+@@ -193,14 +207,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init);
+ void mdio_mux_uninit(void *mux_handle)
+ {
+       struct mdio_mux_parent_bus *pb = mux_handle;
+-      struct mdio_mux_child_bus *cb = pb->children;
+-
+-      while (cb) {
+-              mdiobus_unregister(cb->mii_bus);
+-              mdiobus_free(cb->mii_bus);
+-              cb = cb->next;
+-      }
++      mdio_mux_uninit_children(pb);
+       put_device(&pb->mii_bus->dev);
+ }
+ EXPORT_SYMBOL_GPL(mdio_mux_uninit);
+-- 
+2.30.2
+
diff --git a/queue-5.4/net-mdio-mux-handle-eprobe_defer-correctly.patch b/queue-5.4/net-mdio-mux-handle-eprobe_defer-correctly.patch
new file mode 100644 (file)
index 0000000..046f5cc
--- /dev/null
@@ -0,0 +1,58 @@
+From 24174dfc032da5ba2073500b0c32584372585108 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 20:38:03 -0700
+Subject: net: mdio-mux: Handle -EPROBE_DEFER correctly
+
+From: Saravana Kannan <saravanak@google.com>
+
+[ Upstream commit 7bd0cef5dac685f09ef8b0b2a7748ff42d284dc7 ]
+
+When registering mdiobus children, if we get an -EPROBE_DEFER, we shouldn't
+ignore it and continue registering the rest of the mdiobus children. This
+would permanently prevent the deferring child mdiobus from working instead
+of reattempting it in the future. So, if a child mdiobus needs to be
+reattempted in the future, defer the entire mdio-mux initialization.
+
+This fixes the issue where PHYs sitting under the mdio-mux aren't
+initialized correctly if the PHY's interrupt controller is not yet ready
+when the mdio-mux is being probed. Additional context in the link below.
+
+Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.")
+Link: https://lore.kernel.org/lkml/CAGETcx95kHrv8wA-O+-JtfH7H9biJEGJtijuPVN0V5dUKUAB3A@mail.gmail.com/#t
+Signed-off-by: Saravana Kannan <saravanak@google.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Tested-by: Marc Zyngier <maz@kernel.org>
+Acked-by: Kevin Hilman <khilman@baylibre.com>
+Tested-by: Kevin Hilman <khilman@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/mdio-mux.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
+index c96ef3b3fa3a..ccb3ee704eb1 100644
+--- a/drivers/net/phy/mdio-mux.c
++++ b/drivers/net/phy/mdio-mux.c
+@@ -175,11 +175,15 @@ int mdio_mux_init(struct device *dev,
+               cb->mii_bus->write = mdio_mux_write;
+               r = of_mdiobus_register(cb->mii_bus, child_bus_node);
+               if (r) {
++                      mdiobus_free(cb->mii_bus);
++                      if (r == -EPROBE_DEFER) {
++                              ret_val = r;
++                              goto err_loop;
++                      }
++                      devm_kfree(dev, cb);
+                       dev_err(dev,
+                               "Error: Failed to register MDIO bus for child %pOF\n",
+                               child_bus_node);
+-                      mdiobus_free(cb->mii_bus);
+-                      devm_kfree(dev, cb);
+               } else {
+                       cb->next = pb->children;
+                       pb->children = cb;
+-- 
+2.30.2
+
diff --git a/queue-5.4/net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch b/queue-5.4/net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch
new file mode 100644 (file)
index 0000000..a5c8230
--- /dev/null
@@ -0,0 +1,42 @@
+From b78272abfa40ddede0063c48d4f62fe41e6c6ec5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Aug 2021 21:14:04 +0800
+Subject: net: qlcnic: add missed unlock in qlcnic_83xx_flash_read32
+
+From: Dinghao Liu <dinghao.liu@zju.edu.cn>
+
+[ Upstream commit 0a298d133893c72c96e2156ed7cb0f0c4a306a3e ]
+
+qlcnic_83xx_unlock_flash() is called on all paths after we call
+qlcnic_83xx_lock_flash(), except for one error path on failure
+of QLCRD32(), which may cause a deadlock. This bug is suggested
+by a static analysis tool, please advise.
+
+Fixes: 81d0aeb0a4fff ("qlcnic: flash template based firmware reset recovery")
+Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
+Link: https://lore.kernel.org/r/20210816131405.24024-1-dinghao.liu@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+index 29b9c728a65e..f2014c10f7c9 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -3158,8 +3158,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
+               indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr);
+               ret = QLCRD32(adapter, indirect_addr, &err);
+-              if (err == -EIO)
++              if (err == -EIO) {
++                      qlcnic_83xx_unlock_flash(adapter);
+                       return err;
++              }
+               word = ret;
+               *(u32 *)p_data  = word;
+-- 
+2.30.2
+
diff --git a/queue-5.4/ovs-clear-skb-tstamp-in-forwarding-path.patch b/queue-5.4/ovs-clear-skb-tstamp-in-forwarding-path.patch
new file mode 100644 (file)
index 0000000..27470e4
--- /dev/null
@@ -0,0 +1,39 @@
+From 452fdec58660a92af8b9b6545d80610b98f19438 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Aug 2021 10:22:15 +0800
+Subject: ovs: clear skb->tstamp in forwarding path
+
+From: kaixi.fan <fankaixi.li@bytedance.com>
+
+[ Upstream commit 01634047bf0d5c2d9b7d8095bb4de1663dbeedeb ]
+
+fq qdisc requires tstamp to be cleared in the forwarding path. Now ovs
+doesn't clear skb->tstamp. We encountered a problem with linux
+version 5.4.56 and ovs version 2.14.1, and packets failed to
+dequeue from qdisc when fq qdisc was attached to ovs port.
+
+Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC")
+Signed-off-by: kaixi.fan <fankaixi.li@bytedance.com>
+Signed-off-by: xiexiaohui <xiexiaohui.xxh@bytedance.com>
+Reviewed-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/vport.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
+index 3fc38d16c456..19af0efeb8dc 100644
+--- a/net/openvswitch/vport.c
++++ b/net/openvswitch/vport.c
+@@ -499,6 +499,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
+       }
+       skb->dev = vport->dev;
++      skb->tstamp = 0;
+       vport->ops->send(skb);
+       return;
+-- 
+2.30.2
+
diff --git a/queue-5.4/ptp_pch-restore-dependency-on-pci.patch b/queue-5.4/ptp_pch-restore-dependency-on-pci.patch
new file mode 100644 (file)
index 0000000..3e7e10c
--- /dev/null
@@ -0,0 +1,38 @@
+From 5fe32a02783b01bc75aa09b476bb89fa3fb11f7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Aug 2021 20:33:27 +0300
+Subject: ptp_pch: Restore dependency on PCI
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 55c8fca1dae1fb0d11deaa21b65a647dedb1bc50 ]
+
+During the swap dependency on PCH_GBE to selection PTP_1588_CLOCK_PCH
+incidentally dropped the implicit dependency on the PCI. Restore it.
+
+Fixes: 18d359ceb044 ("pch_gbe, ptp_pch: Fix the dependency direction between these drivers")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
+index 0517272a268e..9fb6f7643ea9 100644
+--- a/drivers/ptp/Kconfig
++++ b/drivers/ptp/Kconfig
+@@ -92,7 +92,8 @@ config DP83640_PHY
+ config PTP_1588_CLOCK_PCH
+       tristate "Intel PCH EG20T as PTP clock"
+       depends on X86_32 || COMPILE_TEST
+-      depends on HAS_IOMEM && NET
++      depends on HAS_IOMEM && PCI
++      depends on NET
+       imply PTP_1588_CLOCK
+       help
+         This driver adds support for using the PCH EG20T as a PTP
+-- 
+2.30.2
+
index 7f9d8073fe6bec0fd98c9170a83baeb920e9ce93..c6f22e53478e6e93576464eb555df5b405487761 100644 (file)
@@ -22,3 +22,23 @@ arm-dts-nomadik-fix-up-interrupt-controller-node-nam.patch
 net-usb-lan78xx-don-t-modify-phy_device-state-concur.patch
 drm-amd-display-fix-dynamic-bpp-issue-with-8k30-with.patch
 bluetooth-hidp-use-correct-wait-queue-when-removing-.patch
+iommu-check-if-group-is-null-before-remove-device.patch
+cpufreq-armada-37xx-forbid-cpufreq-for-1.2-ghz-varia.patch
+dccp-add-do-while-0-stubs-for-dccp_pr_debug-macros.patch
+virtio-protect-vqs-list-access.patch
+vhost-fix-the-calculation-in-vhost_overflow.patch
+bpf-clear-zext_dst-of-dead-insns.patch
+bnxt-don-t-lock-the-tx-queue-from-napi-poll.patch
+bnxt-disable-napi-before-canceling-dim.patch
+net-6pack-fix-slab-out-of-bounds-in-decode_data.patch
+ptp_pch-restore-dependency-on-pci.patch
+bnxt_en-add-missing-dma-memory-barriers.patch
+vrf-reset-skb-conntrack-connection-on-vrf-rcv.patch
+virtio-net-support-xdp-when-not-more-queues.patch
+virtio-net-use-netif_f_gro_hw-instead-of-netif_f_lro.patch
+net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch
+net-mdio-mux-don-t-ignore-memory-allocation-errors.patch
+net-mdio-mux-handle-eprobe_defer-correctly.patch
+ovs-clear-skb-tstamp-in-forwarding-path.patch
+i40e-fix-atr-queue-selection.patch
+iavf-fix-ping-is-lost-after-untrusted-vf-had-tried-t.patch
diff --git a/queue-5.4/vhost-fix-the-calculation-in-vhost_overflow.patch b/queue-5.4/vhost-fix-the-calculation-in-vhost_overflow.patch
new file mode 100644 (file)
index 0000000..37f4190
--- /dev/null
@@ -0,0 +1,49 @@
+From bcb9042bb3f5a18e6885131afb7ddbbffa90a04f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jul 2021 21:07:56 +0800
+Subject: vhost: Fix the calculation in vhost_overflow()
+
+From: Xie Yongji <xieyongji@bytedance.com>
+
+[ Upstream commit f7ad318ea0ad58ebe0e595e59aed270bb643b29b ]
+
+This fixes the incorrect calculation for integer overflow
+when the last address of iova range is 0xffffffff.
+
+Fixes: ec33d031a14b ("vhost: detect 32 bit integer wrap around")
+Reported-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://lore.kernel.org/r/20210728130756.97-2-xieyongji@bytedance.com
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vhost.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index a279ecacbf60..97be299f0a8d 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -702,10 +702,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+                        (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
+ }
++/* Make sure 64 bit math will not overflow. */
+ static bool vhost_overflow(u64 uaddr, u64 size)
+ {
+-      /* Make sure 64 bit math will not overflow. */
+-      return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size;
++      if (uaddr > ULONG_MAX || size > ULONG_MAX)
++              return true;
++
++      if (!size)
++              return false;
++
++      return uaddr > ULONG_MAX - size + 1;
+ }
+ /* Caller should have vq mutex and device mutex. */
+-- 
+2.30.2
+
diff --git a/queue-5.4/virtio-net-support-xdp-when-not-more-queues.patch b/queue-5.4/virtio-net-support-xdp-when-not-more-queues.patch
new file mode 100644 (file)
index 0000000..fced239
--- /dev/null
@@ -0,0 +1,168 @@
+From 2f3e671182f11df3607fcda06a01f1586cc4df7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Mar 2021 10:24:45 +0800
+Subject: virtio-net: support XDP when not more queues
+
+From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+
+[ Upstream commit 97c2c69e1926260c78c7f1c0b2c987934f1dc7a1 ]
+
+The number of queues implemented by many virtio backends is limited,
+especially some machines have a large number of CPUs. In this case, it
+is often impossible to allocate a separate queue for
+XDP_TX/XDP_REDIRECT, then xdp cannot be loaded to work, even xdp does
+not use the XDP_TX/XDP_REDIRECT.
+
+This patch allows XDP_TX/XDP_REDIRECT to run by reuse the existing SQ
+with __netif_tx_lock() hold when there are not enough queues.
+
+Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 62 +++++++++++++++++++++++++++++++---------
+ 1 file changed, 49 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 15453d6fcc23..36f8aeb113a8 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -195,6 +195,9 @@ struct virtnet_info {
+       /* # of XDP queue pairs currently used by the driver */
+       u16 xdp_queue_pairs;
++      /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
++      bool xdp_enabled;
++
+       /* I like... big packets and I cannot lie! */
+       bool big_packets;
+@@ -485,12 +488,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+       return 0;
+ }
+-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
+-{
+-      unsigned int qp;
+-
+-      qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+-      return &vi->sq[qp];
++/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
++ * the current cpu, so it does not need to be locked.
++ *
++ * Here we use marco instead of inline functions because we have to deal with
++ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
++ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
++ * functions to perfectly solve these three problems at the same time.
++ */
++#define virtnet_xdp_get_sq(vi) ({                                       \
++      struct netdev_queue *txq;                                       \
++      typeof(vi) v = (vi);                                            \
++      unsigned int qp;                                                \
++                                                                      \
++      if (v->curr_queue_pairs > nr_cpu_ids) {                         \
++              qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
++              qp += smp_processor_id();                               \
++              txq = netdev_get_tx_queue(v->dev, qp);                  \
++              __netif_tx_acquire(txq);                                \
++      } else {                                                        \
++              qp = smp_processor_id() % v->curr_queue_pairs;          \
++              txq = netdev_get_tx_queue(v->dev, qp);                  \
++              __netif_tx_lock(txq, raw_smp_processor_id());           \
++      }                                                               \
++      v->sq + qp;                                                     \
++})
++
++#define virtnet_xdp_put_sq(vi, q) {                                     \
++      struct netdev_queue *txq;                                       \
++      typeof(vi) v = (vi);                                            \
++                                                                      \
++      txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \
++      if (v->curr_queue_pairs > nr_cpu_ids)                           \
++              __netif_tx_release(txq);                                \
++      else                                                            \
++              __netif_tx_unlock(txq);                                 \
+ }
+ static int virtnet_xdp_xmit(struct net_device *dev,
+@@ -516,7 +548,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
+       if (!xdp_prog)
+               return -ENXIO;
+-      sq = virtnet_xdp_sq(vi);
++      sq = virtnet_xdp_get_sq(vi);
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+               ret = -EINVAL;
+@@ -564,12 +596,13 @@ out:
+       sq->stats.kicks += kicks;
+       u64_stats_update_end(&sq->stats.syncp);
++      virtnet_xdp_put_sq(vi, sq);
+       return ret;
+ }
+ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
+ {
+-      return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
++      return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
+ }
+ /* We copy the packet for XDP in the following cases:
+@@ -1458,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
+               xdp_do_flush_map();
+       if (xdp_xmit & VIRTIO_XDP_TX) {
+-              sq = virtnet_xdp_sq(vi);
++              sq = virtnet_xdp_get_sq(vi);
+               if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
+                       u64_stats_update_begin(&sq->stats.syncp);
+                       sq->stats.kicks++;
+                       u64_stats_update_end(&sq->stats.syncp);
+               }
++              virtnet_xdp_put_sq(vi, sq);
+       }
+       return received;
+@@ -2480,10 +2514,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+       /* XDP requires extra queues for XDP_TX */
+       if (curr_qp + xdp_qp > vi->max_queue_pairs) {
+-              NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
+-              netdev_warn(dev, "request %i queues but max is %i\n",
++              netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
+                           curr_qp + xdp_qp, vi->max_queue_pairs);
+-              return -ENOMEM;
++              xdp_qp = 0;
+       }
+       old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
+@@ -2520,11 +2553,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+       vi->xdp_queue_pairs = xdp_qp;
+       if (prog) {
++              vi->xdp_enabled = true;
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+                       if (i == 0 && !old_prog)
+                               virtnet_clear_guest_offloads(vi);
+               }
++      } else {
++              vi->xdp_enabled = false;
+       }
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+@@ -2609,7 +2645,7 @@ static int virtnet_set_features(struct net_device *dev,
+       int err;
+       if ((dev->features ^ features) & NETIF_F_LRO) {
+-              if (vi->xdp_queue_pairs)
++              if (vi->xdp_enabled)
+                       return -EBUSY;
+               if (features & NETIF_F_LRO)
+-- 
+2.30.2
+
diff --git a/queue-5.4/virtio-net-use-netif_f_gro_hw-instead-of-netif_f_lro.patch b/queue-5.4/virtio-net-use-netif_f_gro_hw-instead-of-netif_f_lro.patch
new file mode 100644 (file)
index 0000000..98b3ddf
--- /dev/null
@@ -0,0 +1,101 @@
+From 382337353db50d281b5994145b84946fb5a93dac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Aug 2021 16:06:59 +0800
+Subject: virtio-net: use NETIF_F_GRO_HW instead of NETIF_F_LRO
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit dbcf24d153884439dad30484a0e3f02350692e4c ]
+
+Commit a02e8964eaf92 ("virtio-net: ethtool configurable LRO")
+maps LRO to virtio guest offloading features and allows the
+administrator to enable and disable those features via ethtool.
+
+This leads to several issues:
+
+- For a device that doesn't support control guest offloads, the "LRO"
+  can't be disabled triggering WARN in dev_disable_lro() when turning
+  off LRO or when enabling forwarding bridging etc.
+
+- For a device that supports control guest offloads, the guest
+  offloads are disabled in cases of bridging, forwarding etc slowing
+  down the traffic.
+
+Fix this by using NETIF_F_GRO_HW instead. Though the spec does not
+guarantee packets to be re-segmented as the original ones,
+we can add that to the spec, possibly with a flag for devices to
+differentiate between GRO and LRO.
+
+Further, we never advertised LRO historically before a02e8964eaf92
+("virtio-net: ethtool configurable LRO") and so bridged/forwarded
+configs effectively always relied on virtio receive offloads behaving
+like GRO - thus even if this breaks any configs it is at least not
+a regression.
+
+Fixes: a02e8964eaf92 ("virtio-net: ethtool configurable LRO")
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reported-by: Ivan <ivan@prestigetransportation.com>
+Tested-by: Ivan <ivan@prestigetransportation.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 36f8aeb113a8..37c2cecd1e50 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = {
+       VIRTIO_NET_F_GUEST_CSUM
+ };
+-#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
++#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+                               (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+                               (1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
+                               (1ULL << VIRTIO_NET_F_GUEST_UFO))
+@@ -2493,7 +2493,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+               virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+               virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
+               virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
+-              NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
++              NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
+               return -EOPNOTSUPP;
+       }
+@@ -2644,15 +2644,15 @@ static int virtnet_set_features(struct net_device *dev,
+       u64 offloads;
+       int err;
+-      if ((dev->features ^ features) & NETIF_F_LRO) {
++      if ((dev->features ^ features) & NETIF_F_GRO_HW) {
+               if (vi->xdp_enabled)
+                       return -EBUSY;
+-              if (features & NETIF_F_LRO)
++              if (features & NETIF_F_GRO_HW)
+                       offloads = vi->guest_offloads_capable;
+               else
+                       offloads = vi->guest_offloads_capable &
+-                                 ~GUEST_OFFLOAD_LRO_MASK;
++                                 ~GUEST_OFFLOAD_GRO_HW_MASK;
+               err = virtnet_set_guest_offloads(vi, offloads);
+               if (err)
+@@ -3128,9 +3128,9 @@ static int virtnet_probe(struct virtio_device *vdev)
+               dev->features |= NETIF_F_RXCSUM;
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+           virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
+-              dev->features |= NETIF_F_LRO;
++              dev->features |= NETIF_F_GRO_HW;
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
+-              dev->hw_features |= NETIF_F_LRO;
++              dev->hw_features |= NETIF_F_GRO_HW;
+       dev->vlan_features = dev->features;
+-- 
+2.30.2
+
diff --git a/queue-5.4/virtio-protect-vqs-list-access.patch b/queue-5.4/virtio-protect-vqs-list-access.patch
new file mode 100644 (file)
index 0000000..7519ad1
--- /dev/null
@@ -0,0 +1,97 @@
+From ec7c95e32fa39841854ad14149368a7a80ee74a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jul 2021 17:26:47 +0300
+Subject: virtio: Protect vqs list access
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit 0e566c8f0f2e8325e35f6f97e13cde5356b41814 ]
+
+VQs may be accessed to mark the device broken while they are
+created/destroyed. Hence protect the access to the vqs list.
+
+Fixes: e2dcdfe95c0b ("virtio: virtio_break_device() to mark all virtqueues broken.")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Link: https://lore.kernel.org/r/20210721142648.1525924-4-parav@nvidia.com
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/virtio/virtio.c      | 1 +
+ drivers/virtio/virtio_ring.c | 8 ++++++++
+ include/linux/virtio.h       | 1 +
+ 3 files changed, 10 insertions(+)
+
+diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
+index a977e32a88f2..59a05f1b8105 100644
+--- a/drivers/virtio/virtio.c
++++ b/drivers/virtio/virtio.c
+@@ -342,6 +342,7 @@ int register_virtio_device(struct virtio_device *dev)
+       virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+       INIT_LIST_HEAD(&dev->vqs);
++      spin_lock_init(&dev->vqs_list_lock);
+       /*
+        * device_add() causes the bus infrastructure to look for a matching
+diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
+index 97e8a195e18f..f6011c9ed32f 100644
+--- a/drivers/virtio/virtio_ring.c
++++ b/drivers/virtio/virtio_ring.c
+@@ -1668,7 +1668,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
+                       cpu_to_le16(vq->packed.event_flags_shadow);
+       }
++      spin_lock(&vdev->vqs_list_lock);
+       list_add_tail(&vq->vq.list, &vdev->vqs);
++      spin_unlock(&vdev->vqs_list_lock);
+       return &vq->vq;
+ err_desc_extra:
+@@ -2126,7 +2128,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
+       memset(vq->split.desc_state, 0, vring.num *
+                       sizeof(struct vring_desc_state_split));
++      spin_lock(&vdev->vqs_list_lock);
+       list_add_tail(&vq->vq.list, &vdev->vqs);
++      spin_unlock(&vdev->vqs_list_lock);
+       return &vq->vq;
+ }
+ EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
+@@ -2210,7 +2214,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
+       }
+       if (!vq->packed_ring)
+               kfree(vq->split.desc_state);
++      spin_lock(&vq->vq.vdev->vqs_list_lock);
+       list_del(&_vq->list);
++      spin_unlock(&vq->vq.vdev->vqs_list_lock);
+       kfree(vq);
+ }
+ EXPORT_SYMBOL_GPL(vring_del_virtqueue);
+@@ -2274,10 +2280,12 @@ void virtio_break_device(struct virtio_device *dev)
+ {
+       struct virtqueue *_vq;
++      spin_lock(&dev->vqs_list_lock);
+       list_for_each_entry(_vq, &dev->vqs, list) {
+               struct vring_virtqueue *vq = to_vvq(_vq);
+               vq->broken = true;
+       }
++      spin_unlock(&dev->vqs_list_lock);
+ }
+ EXPORT_SYMBOL_GPL(virtio_break_device);
+diff --git a/include/linux/virtio.h b/include/linux/virtio.h
+index 15f906e4a748..7c075463c7f2 100644
+--- a/include/linux/virtio.h
++++ b/include/linux/virtio.h
+@@ -111,6 +111,7 @@ struct virtio_device {
+       bool config_enabled;
+       bool config_change_pending;
+       spinlock_t config_lock;
++      spinlock_t vqs_list_lock; /* Protects VQs list access */
+       struct device dev;
+       struct virtio_device_id id;
+       const struct virtio_config_ops *config;
+-- 
+2.30.2
+
diff --git a/queue-5.4/vrf-reset-skb-conntrack-connection-on-vrf-rcv.patch b/queue-5.4/vrf-reset-skb-conntrack-connection-on-vrf-rcv.patch
new file mode 100644 (file)
index 0000000..c8adb86
--- /dev/null
@@ -0,0 +1,209 @@
+From 45aad27907403d19cf8e78488efd99f1e02b1506 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 Aug 2021 12:00:02 +0000
+Subject: vrf: Reset skb conntrack connection on VRF rcv
+
+From: Lahav Schlesinger <lschlesinger@drivenets.com>
+
+[ Upstream commit 09e856d54bda5f288ef8437a90ab2b9b3eab83d1 ]
+
+To fix the "reverse-NAT" for replies.
+
+When a packet is sent over a VRF, the POST_ROUTING hooks are called
+twice: Once from the VRF interface, and once from the "actual"
+interface the packet will be sent from:
+1) First SNAT: l3mdev_l3_out() -> vrf_l3_out() -> .. -> vrf_output_direct()
+     This causes the POST_ROUTING hooks to run.
+2) Second SNAT: 'ip_output()' calls POST_ROUTING hooks again.
+
+Similarly for replies, first ip_rcv() calls PRE_ROUTING hooks, and
+second vrf_l3_rcv() calls them again.
+
+As an example, consider the following SNAT rule:
+> iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j SNAT --to-source 2.2.2.2 -o vrf_1
+
+In this case sending over a VRF will create 2 conntrack entries.
+The first is from the VRF interface, which performs the IP SNAT.
+The second will run the SNAT, but since the "expected reply" will remain
+the same, conntrack randomizes the source port of the packet:
+e..g With a socket bound to 1.1.1.1:10000, sending to 3.3.3.3:53, the conntrack
+rules are:
+udp      17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=0 bytes=0 mark=0 use=1
+udp      17 29 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1
+
+i.e. First SNAT IP from 1.1.1.1 --> 2.2.2.2, and second the src port is
+SNAT-ed from 10000 --> 61033.
+
+But when a reply is sent (3.3.3.3:53 -> 2.2.2.2:61033) only the later
+conntrack entry is matched:
+udp      17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=1 bytes=49 mark=0 use=1
+udp      17 28 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1
+
+And a "port 61033 unreachable" ICMP packet is sent back.
+
+The issue is that when PRE_ROUTING hooks are called from vrf_l3_rcv(),
+the skb already has a conntrack flow attached to it, which means
+nf_conntrack_in() will not resolve the flow again.
+
+This means only the dest port is "reverse-NATed" (61033 -> 10000) but
+the dest IP remains 2.2.2.2, and since the socket is bound to 1.1.1.1 it's
+not received.
+This can be verified by logging the 4-tuple of the packet in '__udp4_lib_rcv()'.
+
+The fix is then to reset the flow when skb is received on a VRF, to let
+conntrack resolve the flow again (which now will hit the earlier flow).
+
+To reproduce: (Without the fix "Got pkt_to_nat_port" will not be printed by
+  running 'bash ./repro'):
+  $ cat run_in_A1.py
+  import logging
+  logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
+  from scapy.all import *
+  import argparse
+
+  def get_packet_to_send(udp_dst_port, msg_name):
+      return Ether(src='11:22:33:44:55:66', dst=iface_mac)/ \
+          IP(src='3.3.3.3', dst='2.2.2.2')/ \
+          UDP(sport=53, dport=udp_dst_port)/ \
+          Raw(f'{msg_name}\x0012345678901234567890')
+
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-iface_mac', dest="iface_mac", type=str, required=True,
+                      help="From run_in_A3.py")
+  parser.add_argument('-socket_port', dest="socket_port", type=str,
+                      required=True, help="From run_in_A3.py")
+  parser.add_argument('-v1_mac', dest="v1_mac", type=str, required=True,
+                      help="From script")
+
+  args, _ = parser.parse_known_args()
+  iface_mac = args.iface_mac
+  socket_port = int(args.socket_port)
+  v1_mac = args.v1_mac
+
+  print(f'Source port before NAT: {socket_port}')
+
+  while True:
+      pkts = sniff(iface='_v0', store=True, count=1, timeout=10)
+      if 0 == len(pkts):
+          print('Something failed, rerun the script :(', flush=True)
+          break
+      pkt = pkts[0]
+      if not pkt.haslayer('UDP'):
+          continue
+
+      pkt_sport = pkt.getlayer('UDP').sport
+      print(f'Source port after NAT: {pkt_sport}', flush=True)
+
+      pkt_to_send = get_packet_to_send(pkt_sport, 'pkt_to_nat_port')
+      sendp(pkt_to_send, '_v0', verbose=False) # Will not be received
+
+      pkt_to_send = get_packet_to_send(socket_port, 'pkt_to_socket_port')
+      sendp(pkt_to_send, '_v0', verbose=False)
+      break
+
+  $ cat run_in_A2.py
+  import socket
+  import netifaces
+
+  print(f"{netifaces.ifaddresses('e00000')[netifaces.AF_LINK][0]['addr']}",
+        flush=True)
+  s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+  s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE,
+               str('vrf_1' + '\0').encode('utf-8'))
+  s.connect(('3.3.3.3', 53))
+  print(f'{s. getsockname()[1]}', flush=True)
+  s.settimeout(5)
+
+  while True:
+      try:
+          # Periodically send in order to keep the conntrack entry alive.
+          s.send(b'a'*40)
+          resp = s.recvfrom(1024)
+          msg_name = resp[0].decode('utf-8').split('\0')[0]
+          print(f"Got {msg_name}", flush=True)
+      except Exception as e:
+          pass
+
+  $ cat repro.sh
+  ip netns del A1 2> /dev/null
+  ip netns del A2 2> /dev/null
+  ip netns add A1
+  ip netns add A2
+
+  ip -n A1 link add _v0 type veth peer name _v1 netns A2
+  ip -n A1 link set _v0 up
+
+  ip -n A2 link add e00000 type bond
+  ip -n A2 link add lo0 type dummy
+  ip -n A2 link add vrf_1 type vrf table 10001
+  ip -n A2 link set vrf_1 up
+  ip -n A2 link set e00000 master vrf_1
+
+  ip -n A2 addr add 1.1.1.1/24 dev e00000
+  ip -n A2 link set e00000 up
+  ip -n A2 link set _v1 master e00000
+  ip -n A2 link set _v1 up
+  ip -n A2 link set lo0 up
+  ip -n A2 addr add 2.2.2.2/32 dev lo0
+
+  ip -n A2 neigh add 1.1.1.10 lladdr 77:77:77:77:77:77 dev e00000
+  ip -n A2 route add 3.3.3.3/32 via 1.1.1.10 dev e00000 table 10001
+
+  ip netns exec A2 iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j \
+       SNAT --to-source 2.2.2.2 -o vrf_1
+
+  sleep 5
+  ip netns exec A2 python3 run_in_A2.py > x &
+  XPID=$!
+  sleep 5
+
+  IFACE_MAC=`sed -n 1p x`
+  SOCKET_PORT=`sed -n 2p x`
+  V1_MAC=`ip -n A2 link show _v1 | sed -n 2p | awk '{print $2'}`
+  ip netns exec A1 python3 run_in_A1.py -iface_mac ${IFACE_MAC} -socket_port \
+          ${SOCKET_PORT} -v1_mac ${SOCKET_PORT}
+  sleep 5
+
+  kill -9 $XPID
+  wait $XPID 2> /dev/null
+  ip netns del A1
+  ip netns del A2
+  tail x -n 2
+  rm x
+  set +x
+
+Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device")
+Signed-off-by: Lahav Schlesinger <lschlesinger@drivenets.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20210815120002.2787653-1-lschlesinger@drivenets.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index f08ed52d51f3..9b626c169554 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1036,6 +1036,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+       bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+       bool is_ndisc = ipv6_ndisc_frame(skb);
++      nf_reset_ct(skb);
++
+       /* loopback, multicast & non-ND link-local traffic; do not push through
+        * packet taps again. Reset pkt_type for upper layers to process skb.
+        * For strict packets with a source LLA, determine the dst using the
+@@ -1092,6 +1094,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
+       skb->skb_iif = vrf_dev->ifindex;
+       IPCB(skb)->flags |= IPSKB_L3SLAVE;
++      nf_reset_ct(skb);
++
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               goto out;
+-- 
+2.30.2
+