--- /dev/null
+From d17352d49e46ad66c502b7926e7f0b509eee7f87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 08:13:39 +0200
+Subject: alx: Fix an error handling path in 'alx_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 33e381448cf7a05d76ac0b47d4a6531ecd0e5c53 ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: ab69bde6b2e9 ("alx: add a simple AR816x/AR817x device driver")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/atheros/alx/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
+index 9e02f8864593..5e90df42b201 100644
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -1849,6 +1849,7 @@ out_free_netdev:
+ free_netdev(netdev);
+ out_pci_release:
+ pci_release_mem_regions(pdev);
++ pci_disable_pcie_error_reporting(pdev);
+ out_pci_disable:
+ pci_disable_device(pdev);
+ return err;
+--
+2.30.2
+
--- /dev/null
+From 3edb6579f5a77dd64672179a579e6441df456105 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 May 2021 21:00:27 +0200
+Subject: batman-adv: Avoid WARN_ON timing related checks
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 9f460ae31c4435fd022c443a6029352217a16ac1 ]
+
+The soft/batadv interface for a queued OGM can be changed during the time
+the OGM was queued for transmission and when the OGM is actually
+transmitted by the worker.
+
+But WARN_ON must be used to denote kernel bugs and not to print simple
+warnings. A warning can simply be printed using pr_warn.
+
+Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Reported-by: syzbot+c0b807de416427ff3dd1@syzkaller.appspotmail.com
+Fixes: ef0a937f7a14 ("batman-adv: consider outgoing interface in OGM sending")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/batman-adv/bat_iv_ogm.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
+index a5e313cd6f44..b9dd150f6f01 100644
+--- a/net/batman-adv/bat_iv_ogm.c
++++ b/net/batman-adv/bat_iv_ogm.c
+@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
+ if (WARN_ON(!forw_packet->if_outgoing))
+ return;
+
+- if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
++ if (forw_packet->if_outgoing->soft_iface != soft_iface) {
++ pr_warn("%s: soft interface switch for queued OGM\n", __func__);
+ return;
++ }
+
+ if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
+ return;
+--
+2.30.2
+
--- /dev/null
+From 9678ee300e0cc1278aa5f01698abf974dcecbbe6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 20:43:37 +0200
+Subject: be2net: Fix an error handling path in 'be_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit c19c8c0e666f9259e2fc4d2fa4b9ff8e3b40ee5d ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: d6b6d9877878 ("be2net: use PCIe AER capability")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Acked-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
+index b6eba29d8e99..7968568bbe21 100644
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -5897,6 +5897,7 @@ drv_cleanup:
+ unmap_bars:
+ be_unmap_pci_bars(adapter);
+ free_netdev:
++ pci_disable_pcie_error_reporting(pdev);
+ free_netdev(netdev);
+ rel_reg:
+ pci_release_regions(pdev);
+--
+2.30.2
+
--- /dev/null
+From ad57dca139f7b019a7514e7c48e30830ca8b1f21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:27 -0400
+Subject: bnxt_en: Call bnxt_ethtool_free() in bnxt_init_one() error path
+
+From: Somnath Kotur <somnath.kotur@broadcom.com>
+
+[ Upstream commit 03400aaa69f916a376e11526cf591901a96a3a5c ]
+
+bnxt_ethtool_init() may have allocated some memory and we need to
+call bnxt_ethtool_free() to properly unwind if bnxt_init_one()
+fails.
+
+Fixes: 7c3809181468 ("bnxt_en: Refactor bnxt_init_one() and turn on TPA support on 57500 chips.")
+Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 48c19602a0f3..c118de27bc5c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -12981,6 +12981,7 @@ init_err_pci_clean:
+ bnxt_hwrm_func_drv_unrgtr(bp);
+ bnxt_free_hwrm_short_cmd_req(bp);
+ bnxt_free_hwrm_resources(bp);
++ bnxt_ethtool_free(bp);
+ kfree(bp->fw_health);
+ bp->fw_health = NULL;
+ bnxt_cleanup_pci(bp);
+--
+2.30.2
+
--- /dev/null
+From 0c4f5481c0c1590b05eec515e2df208bf3eda50d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:26 -0400
+Subject: bnxt_en: Fix TQM fastpath ring backing store computation
+
+From: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
+
+[ Upstream commit c12e1643d2738bcd4e26252ce531878841dd3f38 ]
+
+TQM fastpath ring needs to be sized to store both the requester
+and responder side of RoCE QPs in TQM for supporting bi-directional
+tests. Fix bnxt_alloc_ctx_mem() to multiply the RoCE QPs by a factor of
+2 when computing the number of entries for TQM fastpath ring. This
+fixes an RX pipeline stall issue when running bi-directional max
+RoCE QP tests.
+
+Fixes: c7dd7ab4b204 ("bnxt_en: Improve TQM ring context memory sizing formulas.")
+Signed-off-by: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index e9ac20c1c389..48c19602a0f3 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -7295,7 +7295,7 @@ skip_rdma:
+ entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
+ 2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
+ entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
+- entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
++ entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
+ entries = roundup(entries, ctx->tqm_entries_multiple);
+ entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
+ for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
+--
+2.30.2
+
--- /dev/null
+From 23ad4c4a9adb2b5d384a3f985b26f3e058a66e4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:25 -0400
+Subject: bnxt_en: Rediscover PHY capabilities after firmware reset
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 0afd6a4e8028cc487c240b6cfe04094e45a306e4 ]
+
+There is a missing bnxt_probe_phy() call in bnxt_fw_init_one() to
+rediscover the PHY capabilities after a firmware reset. This can cause
+some PHY related functionalities to fail after a firmware reset. For
+example, in multi-host, the ability for any host to configure the PHY
+settings may be lost after a firmware reset.
+
+Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 027997c711ab..e9ac20c1c389 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11573,6 +11573,8 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
+ bnxt_hwrm_coal_params_qcaps(bp);
+ }
+
++static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
++
+ static int bnxt_fw_init_one(struct bnxt *bp)
+ {
+ int rc;
+@@ -11587,6 +11589,9 @@ static int bnxt_fw_init_one(struct bnxt *bp)
+ netdev_err(bp->dev, "Firmware init phase 2 failed\n");
+ return rc;
+ }
++ rc = bnxt_probe_phy(bp, false);
++ if (rc)
++ return rc;
+ rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
+ if (rc)
+ return rc;
+--
+2.30.2
+
--- /dev/null
+From efe2ba8d2a2c8826b0c9a1e472b882239c6816ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 15:47:32 +0000
+Subject: bpf: Fix leakage under speculation on mispredicted branches
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 9183671af6dbf60a1219371d4ed73e23f43b49db ]
+
+The verifier only enumerates valid control-flow paths and skips paths that
+are unreachable in the non-speculative domain. And so it can miss issues
+under speculative execution on mispredicted branches.
+
+For example, a type confusion has been demonstrated with the following
+crafted program:
+
+ // r0 = pointer to a map array entry
+ // r6 = pointer to readable stack slot
+ // r9 = scalar controlled by attacker
+ 1: r0 = *(u64 *)(r0) // cache miss
+ 2: if r0 != 0x0 goto line 4
+ 3: r6 = r9
+ 4: if r0 != 0x1 goto line 6
+ 5: r9 = *(u8 *)(r6)
+ 6: // leak r9
+
+Since line 3 runs iff r0 == 0 and line 5 runs iff r0 == 1, the verifier
+concludes that the pointer dereference on line 5 is safe. But: if the
+attacker trains both the branches to fall-through, such that the following
+is speculatively executed ...
+
+ r6 = r9
+ r9 = *(u8 *)(r6)
+ // leak r9
+
+... then the program will dereference an attacker-controlled value and could
+leak its content under speculative execution via side-channel. This requires
+to mistrain the branch predictor, which can be rather tricky, because the
+branches are mutually exclusive. However such training can be done at
+congruent addresses in user space using different branches that are not
+mutually exclusive. That is, by training branches in user space ...
+
+ A: if r0 != 0x0 goto line C
+ B: ...
+ C: if r0 != 0x0 goto line D
+ D: ...
+
+... such that addresses A and C collide to the same CPU branch prediction
+entries in the PHT (pattern history table) as those of the BPF program's
+lines 2 and 4, respectively. A non-privileged attacker could simply brute
+force such collisions in the PHT until observing the attack succeeding.
+
+Alternative methods to mistrain the branch predictor are also possible that
+avoid brute forcing the collisions in the PHT. A reliable attack has been
+demonstrated, for example, using the following crafted program:
+
+ // r0 = pointer to a [control] map array entry
+ // r7 = *(u64 *)(r0 + 0), training/attack phase
+ // r8 = *(u64 *)(r0 + 8), oob address
+ // [...]
+ // r0 = pointer to a [data] map array entry
+ 1: if r7 == 0x3 goto line 3
+ 2: r8 = r0
+ // crafted sequence of conditional jumps to separate the conditional
+ // branch in line 193 from the current execution flow
+ 3: if r0 != 0x0 goto line 5
+ 4: if r0 == 0x0 goto exit
+ 5: if r0 != 0x0 goto line 7
+ 6: if r0 == 0x0 goto exit
+ [...]
+ 187: if r0 != 0x0 goto line 189
+ 188: if r0 == 0x0 goto exit
+ // load any slowly-loaded value (due to cache miss in phase 3) ...
+ 189: r3 = *(u64 *)(r0 + 0x1200)
+ // ... and turn it into known zero for verifier, while preserving slowly-
+ // loaded dependency when executing:
+ 190: r3 &= 1
+ 191: r3 &= 2
+ // speculatively bypassed phase dependency
+ 192: r7 += r3
+ 193: if r7 == 0x3 goto exit
+ 194: r4 = *(u8 *)(r8 + 0)
+ // leak r4
+
+As can be seen, in training phase (phase != 0x3), the condition in line 1
+turns into false and therefore r8 with the oob address is overridden with
+the valid map value address, which in line 194 we can read out without
+issues. However, in attack phase, line 2 is skipped, and due to the cache
+miss in line 189 where the map value is (zeroed and later) added to the
+phase register, the condition in line 193 takes the fall-through path due
+to prior branch predictor training, where under speculation, it'll load the
+byte at oob address r8 (unknown scalar type at that point) which could then
+be leaked via side-channel.
+
+One way to mitigate these is to 'branch off' an unreachable path, meaning,
+the current verification path keeps following the is_branch_taken() path
+and we push the other branch to the verification stack. Given this is
+unreachable from the non-speculative domain, this branch's vstate is
+explicitly marked as speculative. This is needed for two reasons: i) if
+this path is solely seen from speculative execution, then we later on still
+want the dead code elimination to kick in in order to sanitize these
+instructions with jmp-1s, and ii) to ensure that paths walked in the
+non-speculative domain are not pruned from earlier walks of paths walked in
+the speculative domain. Additionally, for robustness, we mark the registers
+which have been part of the conditional as unknown in the speculative path
+given there should be no assumptions made on their content.
+
+The fix in here mitigates type confusion attacks described earlier due to
+i) all code paths in the BPF program being explored and ii) existing
+verifier logic already ensuring that given memory access instruction
+references one specific data structure.
+
+An alternative to this fix that has also been looked at in this scope was to
+mark aux->alu_state at the jump instruction with a BPF_JMP_TAKEN state as
+well as direction encoding (always-goto, always-fallthrough, unknown), such
+that mixing of different always-* directions themselves as well as mixing of
+always-* with unknown directions would cause a program rejection by the
+verifier, e.g. programs with constructs like 'if ([...]) { x = 0; } else
+{ x = 1; }' with subsequent 'if (x == 1) { [...] }'. For unprivileged, this
+would result in only single direction always-* taken paths, and unknown taken
+paths being allowed, such that the former could be patched from a conditional
+jump to an unconditional jump (ja). Compared to this approach here, it would
+have two downsides: i) valid programs that otherwise are not performing any
+pointer arithmetic, etc, would potentially be rejected/broken, and ii) we are
+required to turn off path pruning for unprivileged, where both can be avoided
+in this work through pushing the invalid branch to the verification stack.
+
+The issue was originally discovered by Adam and Ofek, and later independently
+discovered and reported as a result of Benedict and Piotr's research work.
+
+Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
+Reported-by: Adam Morrison <mad@cs.tau.ac.il>
+Reported-by: Ofek Kirzner <ofekkir@gmail.com>
+Reported-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reported-by: Piotr Krysiuk <piotras@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 40 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 9e600767803b..bdf4be10c8cc 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5937,6 +5937,27 @@ struct bpf_sanitize_info {
+ bool mask_to_left;
+ };
+
++static struct bpf_verifier_state *
++sanitize_speculative_path(struct bpf_verifier_env *env,
++ const struct bpf_insn *insn,
++ u32 next_idx, u32 curr_idx)
++{
++ struct bpf_verifier_state *branch;
++ struct bpf_reg_state *regs;
++
++ branch = push_stack(env, next_idx, curr_idx, true);
++ if (branch && insn) {
++ regs = branch->frame[branch->curframe]->regs;
++ if (BPF_SRC(insn->code) == BPF_K) {
++ mark_reg_unknown(env, regs, insn->dst_reg);
++ } else if (BPF_SRC(insn->code) == BPF_X) {
++ mark_reg_unknown(env, regs, insn->dst_reg);
++ mark_reg_unknown(env, regs, insn->src_reg);
++ }
++ }
++ return branch;
++}
++
+ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ const struct bpf_reg_state *ptr_reg,
+@@ -6020,7 +6041,8 @@ do_sim:
+ tmp = *dst_reg;
+ *dst_reg = *ptr_reg;
+ }
+- ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
++ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
++ env->insn_idx);
+ if (!ptr_is_dst_reg && ret)
+ *dst_reg = tmp;
+ return !ret ? REASON_STACK : 0;
+@@ -8204,14 +8226,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+ if (err)
+ return err;
+ }
++
+ if (pred == 1) {
+- /* only follow the goto, ignore fall-through */
++ /* Only follow the goto, ignore fall-through. If needed, push
++ * the fall-through branch for simulation under speculative
++ * execution.
++ */
++ if (!env->bypass_spec_v1 &&
++ !sanitize_speculative_path(env, insn, *insn_idx + 1,
++ *insn_idx))
++ return -EFAULT;
+ *insn_idx += insn->off;
+ return 0;
+ } else if (pred == 0) {
+- /* only follow fall-through branch, since
+- * that's where the program will go
++ /* Only follow the fall-through branch, since that's where the
++ * program will go. If needed, push the goto branch for
++ * simulation under speculative execution.
+ */
++ if (!env->bypass_spec_v1 &&
++ !sanitize_speculative_path(env, insn,
++ *insn_idx + insn->off + 1,
++ *insn_idx))
++ return -EFAULT;
+ return 0;
+ }
+
+--
+2.30.2
+
--- /dev/null
+From 46fc4070a270d6f68c7af51ce002c0d4a10b9760 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:45 +0530
+Subject: cxgb4: fix endianness when flashing boot image
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 42a2039753a7f758ba5c85cb199fcf10dc2111eb ]
+
+Boot images are copied to memory and updated with current underlying
+device ID before flashing them to adapter. Ensure the updated images
+are always flashed in Big Endian to allow the firmware to read the
+new images during boot properly.
+
+Fixes: 550883558f17 ("cxgb4: add support to flash boot image")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 44 +++++++++++++---------
+ 1 file changed, 27 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+index 80882cfc370f..029f0c83d785 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
+ * @addr: the start address to write
+ * @n: length of data to write in bytes
+ * @data: the data to write
++ * @byte_oriented: whether to store data as bytes or as words
+ *
+ * Writes up to a page of data (256 bytes) to the serial flash starting
+ * at the given address. All the data must be written to the same page.
++ * If @byte_oriented is set the write data is stored as byte stream
++ * (i.e. matches what on disk), otherwise in big-endian.
+ */
+ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+- unsigned int n, const u8 *data)
++ unsigned int n, const u8 *data, bool byte_oriented)
+ {
+- int ret;
+- u32 buf[64];
+ unsigned int i, c, left, val, offset = addr & 0xff;
++ u32 buf[64];
++ int ret;
+
+ if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
+ return -EINVAL;
+@@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+ (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
+ goto unlock;
+
+- for (left = n; left; left -= c) {
++ for (left = n; left; left -= c, data += c) {
+ c = min(left, 4U);
+- for (val = 0, i = 0; i < c; ++i)
+- val = (val << 8) + *data++;
++ for (val = 0, i = 0; i < c; ++i) {
++ if (byte_oriented)
++ val = (val << 8) + data[i];
++ else
++ val = (val << 8) + data[c - i - 1];
++ }
+
+ ret = sf1_write(adapter, c, c != left, 1, val);
+ if (ret)
+@@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+ t4_write_reg(adapter, SF_OP_A, 0); /* unlock SF */
+
+ /* Read the page to verify the write succeeded */
+- ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
++ ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
++ byte_oriented);
+ if (ret)
+ return ret;
+
+@@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
+ */
+ memcpy(first_page, fw_data, SF_PAGE_SIZE);
+ ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
+- ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
++ ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
+ if (ret)
+ goto out;
+
+@@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
+ for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+ addr += SF_PAGE_SIZE;
+ fw_data += SF_PAGE_SIZE;
+- ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
++ ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
+ if (ret)
+ goto out;
+ }
+
+- ret = t4_write_flash(adap,
+- fw_start + offsetof(struct fw_hdr, fw_ver),
+- sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
++ ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
++ sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
++ true);
+ out:
+ if (ret)
+ dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
+@@ -10208,7 +10216,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+ n = size - i;
+ else
+ n = SF_PAGE_SIZE;
+- ret = t4_write_flash(adap, addr, n, cfg_data);
++ ret = t4_write_flash(adap, addr, n, cfg_data, true);
+ if (ret)
+ goto out;
+
+@@ -10677,13 +10685,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data,
+ for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+ addr += SF_PAGE_SIZE;
+ boot_data += SF_PAGE_SIZE;
+- ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
++ ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
++ false);
+ if (ret)
+ goto out;
+ }
+
+ ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
+- (const u8 *)header);
++ (const u8 *)header, false);
+
+ out:
+ if (ret)
+@@ -10758,7 +10767,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+ for (i = 0; i < size; i += SF_PAGE_SIZE) {
+ n = min_t(u32, size - i, SF_PAGE_SIZE);
+
+- ret = t4_write_flash(adap, addr, n, cfg_data);
++ ret = t4_write_flash(adap, addr, n, cfg_data, false);
+ if (ret)
+ goto out;
+
+@@ -10770,7 +10779,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+ for (i = 0; i < npad; i++) {
+ u8 data = 0;
+
+- ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
++ ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
++ false);
+ if (ret)
+ goto out;
+ }
+--
+2.30.2
+
--- /dev/null
+From 02c1b4978bc0d4b9ea05c0a576aef7c535a8b868 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:46 +0530
+Subject: cxgb4: fix sleep in atomic when flashing PHY firmware
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit f046bd0ae15d8a0bbe57d4647da182420f720c3d ]
+
+Before writing new PHY firmware to on-chip memory, driver queries
+firmware for current running PHY firmware version, which can result
+in sleep waiting for reply. So, move spinlock closer to the actual
+on-chip memory write operation, instead of taking it at the callers.
+
+Fixes: 5fff701c838e ("cxgb4: always sync access when flashing PHY firmware")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 --
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 --
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 ++
+ 3 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index 61ea3ec5c3fc..bc2de01d0539 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1337,9 +1337,7 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
+ return ret;
+ }
+
+- spin_lock_bh(&adap->win0_lock);
+ ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
+- spin_unlock_bh(&adap->win0_lock);
+ if (ret)
+ dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+index 1f601de02e70..762113a04dde 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+@@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap)
+
+ /* Load PHY Firmware onto adapter.
+ */
+- spin_lock_bh(&adap->win0_lock);
+ ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
+ (u8 *)phyf->data, phyf->size);
+- spin_unlock_bh(&adap->win0_lock);
+ if (ret < 0)
+ dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
+ -ret);
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+index 029f0c83d785..601853bb34c9 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -3820,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win,
+ /* Copy the supplied PHY Firmware image to the adapter memory location
+ * allocated by the adapter firmware.
+ */
++ spin_lock_bh(&adap->win0_lock);
+ ret = t4_memory_rw(adap, win, mtype, maddr,
+ phy_fw_size, (__be32 *)phy_fw_data,
+ T4_MEMORY_WRITE);
++ spin_unlock_bh(&adap->win0_lock);
+ if (ret)
+ return ret;
+
+--
+2.30.2
+
--- /dev/null
+From 0e203293194837d0583ef07a7ba6e942b08f163c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 19:20:44 +0530
+Subject: cxgb4: fix wrong ethtool n-tuple rule lookup
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 09427c1915f754ebe7d3d8e54e79bbee48afe916 ]
+
+The TID returned during successful filter creation is relative to
+the region in which the filter is created. Using it directly always
+returns Hi Prio/Normal filter region's entry for the first couple of
+entries, even though the rule is actually inserted in Hash region.
+Fix by analyzing in which region the filter has been inserted and
+save the absolute TID to be used for lookup later.
+
+Fixes: db43b30cd89c ("cxgb4: add ethtool n-tuple filter deletion")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 24 ++++++++++++-------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index df20485b5744..83ed10ac8660 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1624,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
+ u32 ftid)
+ {
+ struct tid_info *t = &adap->tids;
+- struct filter_entry *f;
+
+- if (ftid < t->nhpftids)
+- f = &adap->tids.hpftid_tab[ftid];
+- else if (ftid < t->nftids)
+- f = &adap->tids.ftid_tab[ftid - t->nhpftids];
+- else
+- f = lookup_tid(&adap->tids, ftid);
++ if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
++ return &t->hpftid_tab[ftid - t->hpftid_base];
++
++ if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
++ return &t->ftid_tab[ftid - t->ftid_base];
+
+- return f;
++ return lookup_tid(t, ftid);
+ }
+
+ static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
+@@ -1840,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev,
+ filter_id = filter_info->loc_array[cmd->fs.location];
+ f = cxgb4_get_filter_entry(adapter, filter_id);
+
++ if (f->fs.prio)
++ filter_id -= adapter->tids.hpftid_base;
++ else if (!f->fs.hash)
++ filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
++
+ ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
+ if (ret)
+ goto err;
+@@ -1899,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev,
+
+ filter_info = &adapter->ethtool_filters->port[pi->port_id];
+
++ if (fs.prio)
++ tid += adapter->tids.hpftid_base;
++ else if (!fs.hash)
++ tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
++
+ filter_info->loc_array[cmd->fs.location] = tid;
+ set_bit(cmd->fs.location, filter_info->bmap);
+ filter_info->in_use++;
+--
+2.30.2
+
--- /dev/null
+From f2be84d93f7968155528be67b1b659636559ed1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 11:29:48 +0200
+Subject: cxgb4: fix wrong shift.
+
+From: Pavel Machek <pavel@denx.de>
+
+[ Upstream commit 39eb028183bc7378bb6187067e20bf6d8c836407 ]
+
+While fixing coverity warning, commit dd2c79677375 introduced typo in
+shift value. Fix that.
+
+Signed-off-by: Pavel Machek (CIP) <pavel@denx.de>
+Fixes: dd2c79677375 ("cxgb4: Fix unintentional sign extension issues")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+index e664e05b9f02..5fbc087268db 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+@@ -198,7 +198,7 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
+ WORD_MASK, f->fs.nat_lip[3] |
+ f->fs.nat_lip[2] << 8 |
+ f->fs.nat_lip[1] << 16 |
+- (u64)f->fs.nat_lip[0] << 25, 1);
++ (u64)f->fs.nat_lip[0] << 24, 1);
+ }
+ }
+
+--
+2.30.2
+
--- /dev/null
+From e614a1f374eae5b2a3d6a77f8258ca046e802f01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:47 +0530
+Subject: cxgb4: halt chip before flashing PHY firmware image
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 6d297540f75d759489054e8b07932208fc4db2cb ]
+
+When using firmware-assisted PHY firmware image write to flash,
+halt the chip before beginning the flash write operation to allow
+the running firmware to store the image persistently. Otherwise,
+the running firmware will only store the PHY image in local on-chip
+RAM, which will be lost after next reset.
+
+Fixes: 4ee339e1e92a ("cxgb4: add support to flash PHY image")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 22 ++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index bc2de01d0539..df20485b5744 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1337,11 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
+ return ret;
+ }
+
++ /* We have to RESET the chip/firmware because we need the
++ * chip in uninitialized state for loading new PHY image.
++ * Otherwise, the running firmware will only store the PHY
++ * image in local RAM which will be lost after next reset.
++ */
++ ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
++ if (ret < 0) {
++ dev_err(adap->pdev_dev,
++ "Set FW to RESET for flashing PHY FW failed. ret: %d\n",
++ ret);
++ return ret;
++ }
++
+ ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
+- if (ret)
+- dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
++ if (ret < 0) {
++ dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
++ ret);
++ return ret;
++ }
+
+- return ret;
++ return 0;
+ }
+
+ static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
+--
+2.30.2
+
--- /dev/null
+From ec84e0e4f5cfdfb8270a1671db1f4d8d30ed56c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 18:49:48 -0700
+Subject: ethtool: strset: fix message length calculation
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit e175aef902697826d344ce3a12189329848fe898 ]
+
+Outer nest for ETHTOOL_A_STRSET_STRINGSETS is not accounted for.
+This may result in ETHTOOL_MSG_STRSET_GET producing a warning like:
+
+ calculated message payload length (684) not sufficient
+ WARNING: CPU: 0 PID: 30967 at net/ethtool/netlink.c:369 ethnl_default_doit+0x87a/0xa20
+
+and a splat.
+
+As usually with such warnings three conditions must be met for the warning
+to trigger:
+ - there must be no skb size rounding up (e.g. reply_size of 684);
+ - string set must be per-device (so that the header gets populated);
+ - the device name must be at least 12 characters long.
+
+all in all with current user space it looks like reading priv flags
+is the only place this could potentially happen. Or with syzbot :)
+
+Reported-by: syzbot+59aa77b92d06cd5a54f2@syzkaller.appspotmail.com
+Fixes: 71921690f974 ("ethtool: provide string sets with STRSET_GET request")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/strset.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
+index c3a5489964cd..9908b922cce8 100644
+--- a/net/ethtool/strset.c
++++ b/net/ethtool/strset.c
+@@ -328,6 +328,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
+ int len = 0;
+ int ret;
+
++ len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
++
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ const struct strset_info *set_info = &data->sets[i];
+
+--
+2.30.2
+
--- /dev/null
+From c95d4fc3d6bcc9bb0e065ea091f4767db8a9ea53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 May 2021 08:34:59 +0200
+Subject: ice: add ndo_bpf callback for safe mode netdev ops
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit ebc5399ea1dfcddac31974091086a3379141899b ]
+
+ice driver requires a programmable pipeline firmware package in order to
+have a support for advanced features. Otherwise, driver falls back to so
+called 'safe mode'. For that mode, ndo_bpf callback is not exposed and
+when user tries to load XDP program, the following happens:
+
+$ sudo ./xdp1 enp179s0f1
+libbpf: Kernel error message: Underlying driver does not support XDP in native mode
+link set xdp fd failed
+
+which is sort of confusing, as there is a native XDP support, but not in
+the current mode. Improve the user experience by providing the specific
+ndo_bpf callback dedicated for safe mode which will make use of extack
+to explicitly let the user know that the DDP package is missing and
+that's the reason that the XDP can't be loaded onto interface currently.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Fixes: efc2214b6047 ("ice: Add support for XDP")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index d821c687f239..b61cd84be97f 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2554,6 +2554,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ return (ret || xdp_ring_err) ? -ENOMEM : 0;
+ }
+
++/**
++ * ice_xdp_safe_mode - XDP handler for safe mode
++ * @dev: netdevice
++ * @xdp: XDP command
++ */
++static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
++ struct netdev_bpf *xdp)
++{
++ NL_SET_ERR_MSG_MOD(xdp->extack,
++ "Please provide working DDP firmware package in order to use XDP\n"
++ "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
++ return -EOPNOTSUPP;
++}
++
+ /**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+@@ -6805,6 +6819,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = {
+ .ndo_change_mtu = ice_change_mtu,
+ .ndo_get_stats64 = ice_get_stats64,
+ .ndo_tx_timeout = ice_tx_timeout,
++ .ndo_bpf = ice_xdp_safe_mode,
+ };
+
+ static const struct net_device_ops ice_netdev_ops = {
+--
+2.30.2
+
--- /dev/null
+From ddc3d0f739329e4ce0f127baeb337a421b6aeff5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 May 2021 08:35:00 +0200
+Subject: ice: parameterize functions responsible for Tx ring management
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 2e84f6b3773f43263124c76499c0c4ec3f40aa9b ]
+
+Commit ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match
+number of Rx queues") tried to address the incorrect setting of XDP
+queue count that was based on the Tx queue count, whereas in theory we
+should provide the XDP queue per Rx queue. However, the routines that
+setup and destroy the set of Tx resources are still based on the
+vsi->num_txq.
+
+Ice supports the asynchronous Tx/Rx queue count, so for a setup where
+vsi->num_txq > vsi->num_rxq, ice_vsi_stop_tx_rings and ice_vsi_cfg_txqs
+will be accessing the vsi->xdp_rings out of the bounds.
+
+Parameterize two mentioned functions so they get the size of Tx resources
+array as the input.
+
+Fixes: ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match number of Rx queues")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 27e439853c3b..55432ea360ad 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -1715,12 +1715,13 @@ setup_rings:
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
++ * @count: number of Tx ring array elements
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+ static int
+-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
++ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
+ {
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ u16 q_idx = 0;
+@@ -1732,7 +1733,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+
+ qg_buf->num_txqs = 1;
+
+- for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
++ for (q_idx = 0; q_idx < count; q_idx++) {
+ err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+ if (err)
+ goto err_cfg_txqs;
+@@ -1752,7 +1753,7 @@ err_cfg_txqs:
+ */
+ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+ {
+- return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
++ return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+ }
+
+ /**
+@@ -1767,7 +1768,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+ int ret;
+ int i;
+
+- ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
++ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+ if (ret)
+ return ret;
+
+@@ -1965,17 +1966,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
++ * @count: number of Tx ring array elements
+ */
+ static int
+ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+- u16 rel_vmvf_num, struct ice_ring **rings)
++ u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
+ {
+ u16 q_idx;
+
+ if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+ return -EINVAL;
+
+- for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
++ for (q_idx = 0; q_idx < count; q_idx++) {
+ struct ice_txq_meta txq_meta = { };
+ int status;
+
+@@ -2003,7 +2005,7 @@ int
+ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num)
+ {
+- return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
++ return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
+ }
+
+ /**
+@@ -2012,7 +2014,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ */
+ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+ {
+- return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
++ return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
+ }
+
+ /**
+--
+2.30.2
+
--- /dev/null
+From bdcc05527dc8a6c299615322fd78f63d6f210d9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 13:04:35 +0200
+Subject: icmp: don't send out ICMP messages with a source address of 0.0.0.0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+[ Upstream commit 321827477360934dc040e9d3c626bf1de6c3ab3c ]
+
+When constructing ICMP response messages, the kernel will try to pick a
+suitable source address for the outgoing packet. However, if no IPv4
+addresses are configured on the system at all, this will fail and we end up
+producing an ICMP message with a source address of 0.0.0.0. This can happen
+on a box routing IPv4 traffic via v6 nexthops, for instance.
+
+Since 0.0.0.0 is not generally routable on the internet, there's a good
+chance that such ICMP messages will never make it back to the sender of the
+original packet that the ICMP message was sent in response to. This, in
+turn, can create connectivity and PMTUd problems for senders. Fortunately,
+RFC7600 reserves a dummy address to be used as a source for ICMP
+messages (192.0.0.8/32), so let's teach the kernel to substitute that
+address as a last resort if the regular source address selection procedure
+fails.
+
+Below is a quick example reproducing this issue with network namespaces:
+
+ip netns add ns0
+ip l add type veth peer netns ns0
+ip l set dev veth0 up
+ip a add 10.0.0.1/24 dev veth0
+ip a add fc00:dead:cafe:42::1/64 dev veth0
+ip r add 10.1.0.0/24 via inet6 fc00:dead:cafe:42::2
+ip -n ns0 l set dev veth0 up
+ip -n ns0 a add fc00:dead:cafe:42::2/64 dev veth0
+ip -n ns0 r add 10.0.0.0/24 via inet6 fc00:dead:cafe:42::1
+ip netns exec ns0 sysctl -w net.ipv4.icmp_ratelimit=0
+ip netns exec ns0 sysctl -w net.ipv4.ip_forward=1
+tcpdump -tpni veth0 -c 2 icmp &
+ping -w 1 10.1.0.1 > /dev/null
+tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
+listening on veth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
+IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 29, seq 1, length 64
+IP 0.0.0.0 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92
+2 packets captured
+2 packets received by filter
+0 packets dropped by kernel
+
+With this patch the above capture changes to:
+IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 31127, seq 1, length 64
+IP 192.0.0.8 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Juliusz Chroboczek <jch@irif.fr>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/uapi/linux/in.h | 3 +++
+ net/ipv4/icmp.c | 7 +++++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
+index 7d6687618d80..d1b327036ae4 100644
+--- a/include/uapi/linux/in.h
++++ b/include/uapi/linux/in.h
+@@ -289,6 +289,9 @@ struct sockaddr_in {
+ /* Address indicating an error return. */
+ #define INADDR_NONE ((unsigned long int) 0xffffffff)
+
++/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
++#define INADDR_DUMMY ((unsigned long int) 0xc0000008)
++
+ /* Network number for local host loopback. */
+ #define IN_LOOPBACKNET 127
+
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 616e2dc1c8fa..cd65d3146c30 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+ icmp_param.data_len = room;
+ icmp_param.head_len = sizeof(struct icmphdr);
+
++ /* if we don't have a source address at this point, fall back to the
++ * dummy address instead of sending out a packet with a source address
++ * of 0.0.0.0
++ */
++ if (!fl4.saddr)
++ fl4.saddr = htonl(INADDR_DUMMY);
++
+ icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ ende:
+ ip_rt_put(rt);
+--
+2.30.2
+
--- /dev/null
+From 99b09b5a5dd93929aa3676e37fabe591630fb1cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 18:24:59 -0600
+Subject: ipv4: Fix device used for dst_alloc with local routes
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit b87b04f5019e821c8c6c7761f258402e43500a1f ]
+
+Oliver reported a use case where deleting a VRF device can hang
+waiting for the refcnt to drop to 0. The root cause is that the dst
+is allocated against the VRF device but cached on the loopback
+device.
+
+The use case (added to the selftests) has an implicit VRF crossing
+due to the ordering of the FIB rules (lookup local is before the
+l3mdev rule, but the problem occurs even if the FIB rules are
+re-ordered with local after l3mdev because the VRF table does not
+have a default route to terminate the lookup). The end result is
+is that the FIB lookup returns the loopback device as the nexthop,
+but the ingress device is in a VRF. The mismatch causes the dst
+alloc against the VRF device but then cached on the loopback.
+
+The fix is to bring the trick used for IPv6 (see ip6_rt_get_dev_rcu):
+pick the dst alloc device based the fib lookup result but with checks
+that the result has a nexthop device (e.g., not an unreachable or
+prohibit entry).
+
+Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant")
+Reported-by: Oliver Herms <oliver.peter.herms@gmail.com>
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c | 15 +++++++++++++-
+ tools/testing/selftests/net/fib_tests.sh | 25 ++++++++++++++++++++++++
+ 2 files changed, 39 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index d635b4f32d34..09506203156d 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2081,6 +2081,19 @@ martian_source:
+ return err;
+ }
+
++/* get device for dst_alloc with local routes */
++static struct net_device *ip_rt_get_dev(struct net *net,
++ const struct fib_result *res)
++{
++ struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
++ struct net_device *dev = NULL;
++
++ if (nhc)
++ dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
++
++ return dev ? : net->loopback_dev;
++}
++
+ /*
+ * NOTE. We drop all the packets that has local source
+ * addresses, because every properly looped back packet
+@@ -2237,7 +2250,7 @@ local_input:
+ }
+ }
+
+- rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
++ rth = rt_dst_alloc(ip_rt_get_dev(net, res),
+ flags | RTCF_LOCAL, res->type,
+ IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ if (!rth)
+diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
+index 2b5707738609..6fad54c7ecb4 100755
+--- a/tools/testing/selftests/net/fib_tests.sh
++++ b/tools/testing/selftests/net/fib_tests.sh
+@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
+ ipv4_rt_replace_mpath
+ }
+
++# checks that cached input route on VRF port is deleted
++# when VRF is deleted
++ipv4_local_rt_cache()
++{
++ run_cmd "ip addr add 10.0.0.1/32 dev lo"
++ run_cmd "ip netns add test-ns"
++ run_cmd "ip link add veth-outside type veth peer name veth-inside"
++ run_cmd "ip link add vrf-100 type vrf table 1100"
++ run_cmd "ip link set veth-outside master vrf-100"
++ run_cmd "ip link set veth-inside netns test-ns"
++ run_cmd "ip link set veth-outside up"
++ run_cmd "ip link set vrf-100 up"
++ run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
++ run_cmd "ip netns exec test-ns ip link set veth-inside up"
++ run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
++ run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
++ run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
++ run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
++ run_cmd "ip link delete vrf-100"
++
++ # if we do not hang test is a success
++ log_test $? 0 "Cached route removed from VRF port device"
++}
++
+ ipv4_route_test()
+ {
+ route_setup
+
+ ipv4_rt_add
+ ipv4_rt_replace
++ ipv4_local_rt_cache
+
+ route_cleanup
+ }
+--
+2.30.2
+
--- /dev/null
+From 2f725f00968081401b77d178b83e6ed4f50e0dda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 22:42:57 +0200
+Subject: lantiq: net: fix duplicated skb in rx descriptor ring
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit 7ea6cd16f1599c1eac6018751eadbc5fc736b99a ]
+
+The previous commit didn't fix the bug properly. By mistake, it replaces
+the pointer of the next skb in the descriptor ring instead of the current
+one. As a result, the two descriptors are assigned the same SKB. The error
+is seen during the iperf test when skb_put tries to insert a second packet
+and exceeds the available buffer.
+
+Fixes: c7718ee96dbc ("net: lantiq: fix memory corruption in RX ring ")
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index 3da494df72f3..072075bc60ee 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
+
+ static int xrx200_alloc_skb(struct xrx200_chan *ch)
+ {
++ struct sk_buff *skb = ch->skb[ch->dma.desc];
+ dma_addr_t mapping;
+ int ret = 0;
+
+@@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
+ XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
+ dev_kfree_skb_any(ch->skb[ch->dma.desc]);
++ ch->skb[ch->dma.desc] = skb;
+ ret = -ENOMEM;
+ goto skip;
+ }
+@@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
+ ch->dma.desc %= LTQ_DESC_NUM;
+
+ if (ret) {
+- ch->skb[ch->dma.desc] = skb;
+ net_dev->stats.rx_dropped++;
+ netdev_err(net_dev, "failed to allocate new rx buffer\n");
+ return ret;
+--
+2.30.2
+
--- /dev/null
+From b4b0a25d7992e13e0786ad8602b42f7ca654bc67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Jun 2021 14:08:35 +0100
+Subject: libbpf: Fixes incorrect rx_ring_setup_done
+
+From: Kev Jackson <foamdino@gmail.com>
+
+[ Upstream commit 11fc79fc9f2e395aa39fa5baccae62767c5d8280 ]
+
+When calling xsk_socket__create_shared(), the logic at line 1097 marks a
+boolean flag true within the xsk_umem structure to track setup progress
+in order to support multiple calls to the function. However, instead of
+marking umem->tx_ring_setup_done, the code incorrectly sets
+umem->rx_ring_setup_done. This leads to improper behaviour when
+creating and destroying xsk and umem structures.
+
+Multiple calls to this function is documented as supported.
+
+Fixes: ca7a83e2487a ("libbpf: Only create rx and tx XDP rings when necessary")
+Signed-off-by: Kev Jackson <foamdino@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/bpf/YL4aU4f3Aaik7CN0@linux-dev
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/bpf/xsk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
+index 007fe5d59438..fe2bec500bf6 100644
+--- a/tools/lib/bpf/xsk.c
++++ b/tools/lib/bpf/xsk.c
+@@ -928,7 +928,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+- umem->rx_ring_setup_done = true;
++ umem->tx_ring_setup_done = true;
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+--
+2.30.2
+
--- /dev/null
+From 967dadfa726c529e2a84fa73398a9864400fc27b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 May 2021 12:16:49 +0800
+Subject: mac80211: fix skb length check in ieee80211_scan_rx()
+
+From: Du Cheng <ducheng2@gmail.com>
+
+[ Upstream commit e298aa358f0ca658406d524b6639fe389cb6e11e ]
+
+Replace hard-coded compile-time constants for header length check
+with dynamic determination based on the frame type. Otherwise, we
+hit a validation WARN_ON in cfg80211 later.
+
+Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results")
+Reported-by: syzbot+405843667e93b9790fc1@syzkaller.appspotmail.com
+Signed-off-by: Du Cheng <ducheng2@gmail.com>
+Link: https://lore.kernel.org/r/20210510041649.589754-1-ducheng2@gmail.com
+[style fixes, reword commit message]
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/scan.c | 21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
+index d4cc9ac2d703..6b50cb5e0e3c 100644
+--- a/net/mac80211/scan.c
++++ b/net/mac80211/scan.c
+@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+ struct ieee80211_bss *bss;
+ struct ieee80211_channel *channel;
++ size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
++ u.probe_resp.variable);
++
++ if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
++ !ieee80211_is_beacon(mgmt->frame_control) &&
++ !ieee80211_is_s1g_beacon(mgmt->frame_control))
++ return;
+
+ if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
+- if (skb->len < 15)
+- return;
+- } else if (skb->len < 24 ||
+- (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+- !ieee80211_is_beacon(mgmt->frame_control)))
++ if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
++ min_hdr_len = offsetof(struct ieee80211_ext,
++ u.s1g_short_beacon.variable);
++ else
++ min_hdr_len = offsetof(struct ieee80211_ext,
++ u.s1g_beacon);
++ }
++
++ if (skb->len < min_hdr_len)
+ return;
+
+ sdata1 = rcu_dereference(local->scan_sdata);
+--
+2.30.2
+
--- /dev/null
+From e93617ddfd3ca6507c5b20118982cac8f4325bc2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:24:32 +0300
+Subject: mlxsw: core: Set thermal zone polling delay argument to real value at
+ init
+
+From: Mykola Kostenok <c_mykolak@nvidia.com>
+
+[ Upstream commit 2fd8d84ce3095e8a7b5fe96532c91b1b9e07339c ]
+
+Thermal polling delay argument for modules and gearboxes thermal zones
+used to be initialized with zero value, while actual delay was used to
+be set by mlxsw_thermal_set_mode() by thermal operation callback
+set_mode(). After operations set_mode()/get_mode() have been removed by
+cited commits, modules and gearboxes thermal zones always have polling
+time set to zero and do not perform temperature monitoring.
+
+Set non-zero "polling_delay" in thermal_zone_device_register() routine,
+thus, the relevant thermal zones will perform thermal monitoring.
+
+Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
+Fixes: 5d7bd8aa7c35 ("thermal: Simplify or eliminate unnecessary set_mode() methods")
+Fixes: 1ee14820fd8e ("thermal: remove get_mode() operation of drivers")
+Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
+Acked-by: Vadim Pasternak <vadimp@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+index bf85ce9835d7..42e4437ac3c1 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+@@ -708,7 +708,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
+ MLXSW_THERMAL_TRIP_MASK,
+ module_tz,
+ &mlxsw_thermal_module_ops,
+- NULL, 0, 0);
++ NULL, 0,
++ module_tz->parent->polling_delay);
+ if (IS_ERR(module_tz->tzdev)) {
+ err = PTR_ERR(module_tz->tzdev);
+ return err;
+@@ -830,7 +831,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+ MLXSW_THERMAL_TRIP_MASK,
+ gearbox_tz,
+ &mlxsw_thermal_gearbox_ops,
+- NULL, 0, 0);
++ NULL, 0,
++ gearbox_tz->parent->polling_delay);
+ if (IS_ERR(gearbox_tz->tzdev))
+ return PTR_ERR(gearbox_tz->tzdev);
+
+--
+2.30.2
+
--- /dev/null
+From 18cd184b19485619b40af475182474ec5e3a42f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:24:30 +0300
+Subject: mlxsw: reg: Spectrum-3: Enforce lowest max-shaper burst size of 11
+
+From: Petr Machata <petrm@nvidia.com>
+
+[ Upstream commit 306b9228c097b4101c150ccd262372ded8348644 ]
+
+A max-shaper is the HW component responsible for delaying egress traffic
+above a configured transmission rate. Burst size is the amount of traffic
+that is allowed to pass without accounting. The burst size value needs to
+be such that it can be expressed as 2^BS * 512 bits, where BS lies in a
+certain ASIC-dependent range. mlxsw enforces that this holds before
+attempting to configure the shaper.
+
+The assumption for Spectrum-3 was that the lower limit of BS would be 5,
+like for Spectrum-1. But as of now, the limit is still 11. Therefore fix
+the driver accordingly, so that incorrect values are rejected early with a
+proper message.
+
+Fixes: 23effa2479ba ("mlxsw: reg: Add max_shaper_bs to QoS ETS Element Configuration")
+Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+index c4adc7f740d3..769386971ac3 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+@@ -3863,7 +3863,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
+ #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25
+ #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5
+ #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11
+-#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5
++#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11
+
+ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+--
+2.30.2
+
--- /dev/null
+From 429f59e778f66516b574411ca0eff7c2757aae67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:42 -0700
+Subject: mptcp: do not warn on bad input from the network
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 61e710227e97172355d5f150d5c78c64175d9fb2 ]
+
+warn_bad_map() produces a kernel WARN on bad input coming
+from the network. Use pr_debug() to avoid spamming the system
+log.
+
+Additionally, when the right bound check fails, warn_bad_map() reports
+the wrong ssn value, let's fix it.
+
+Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/107
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/subflow.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 1ee4d106ce1c..98a5a68ec15d 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -754,10 +754,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
+ return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
+ }
+
+-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
++static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+ {
+- WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+- ssn, subflow->map_subflow_seq, subflow->map_data_len);
++ pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
++ ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ }
+
+ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
+@@ -782,13 +782,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
+ /* Mapping covers data later in the subflow stream,
+ * currently unsupported.
+ */
+- warn_bad_map(subflow, ssn);
++ dbg_bad_map(subflow, ssn);
+ return false;
+ }
+ if (unlikely(!before(ssn, subflow->map_subflow_seq +
+ subflow->map_data_len))) {
+ /* Mapping does covers past subflow data, invalid */
+- warn_bad_map(subflow, ssn + skb->len);
++ dbg_bad_map(subflow, ssn);
+ return false;
+ }
+ return true;
+--
+2.30.2
+
--- /dev/null
+From be2b06a38721f9a75fcf5f39788e3deef7b04693 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:30 +0300
+Subject: mptcp: Fix out of bounds when parsing TCP options
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit 07718be265680dcf496347d475ce1a5442f55ad7 ]
+
+The TCP option parser in mptcp (mptcp_get_options) could read one byte
+out of bounds. When the length is 1, the execution flow gets into the
+loop, reads one byte of the opcode, and if the opcode is neither
+TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds the
+length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/options.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/mptcp/options.c b/net/mptcp/options.c
+index 8848a9e2a95b..47d90cf31f12 100644
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -337,6 +337,8 @@ void mptcp_get_options(const struct sk_buff *skb,
+ length--;
+ continue;
+ default:
++ if (length < 2)
++ return;
+ opsize = *ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+--
+2.30.2
+
--- /dev/null
+From 1f7da5e67391141e8c8fea8ea920cbd4d41be9fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:44 -0700
+Subject: mptcp: fix soft lookup in subflow_error_report()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 499ada5073361c631f2a3c4a8aed44d53b6f82ec ]
+
+Maxim reported a soft lookup in subflow_error_report():
+
+ watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:0]
+ RIP: 0010:native_queued_spin_lock_slowpath
+ RSP: 0018:ffffa859c0003bc0 EFLAGS: 00000202
+ RAX: 0000000000000101 RBX: 0000000000000001 RCX: 0000000000000000
+ RDX: ffff9195c2772d88 RSI: 0000000000000000 RDI: ffff9195c2772d88
+ RBP: ffff9195c2772d00 R08: 00000000000067b0 R09: c6e31da9eb1e44f4
+ R10: ffff9195ef379700 R11: ffff9195edb50710 R12: ffff9195c2772d88
+ R13: ffff9195f500e3d0 R14: ffff9195ef379700 R15: ffff9195ef379700
+ FS: 0000000000000000(0000) GS:ffff91961f400000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000000c000407000 CR3: 0000000002988000 CR4: 00000000000006f0
+ Call Trace:
+ <IRQ>
+ _raw_spin_lock_bh
+ subflow_error_report
+ mptcp_subflow_data_available
+ __mptcp_move_skbs_from_subflow
+ mptcp_data_ready
+ tcp_data_queue
+ tcp_rcv_established
+ tcp_v4_do_rcv
+ tcp_v4_rcv
+ ip_protocol_deliver_rcu
+ ip_local_deliver_finish
+ __netif_receive_skb_one_core
+ netif_receive_skb
+ rtl8139_poll 8139too
+ __napi_poll
+ net_rx_action
+ __do_softirq
+ __irq_exit_rcu
+ common_interrupt
+ </IRQ>
+
+The calling function - mptcp_subflow_data_available() - can be invoked
+from different contexts:
+- plain ssk socket lock
+- ssk socket lock + mptcp_data_lock
+- ssk socket lock + mptcp_data_lock + msk socket lock.
+
+Since subflow_error_report() tries to acquire the mptcp_data_lock, the
+latter two call chains will cause soft lookup.
+
+This change addresses the issue moving the error reporting call to
+outer functions, where the held locks list is known and the we can
+acquire only the needed one.
+
+Reported-by: Maxim Galaganov <max@internet.ru>
+Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/199
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 9 ++++++
+ net/mptcp/subflow.c | 75 +++++++++++++++++++++++---------------------
+ 2 files changed, 48 insertions(+), 36 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 78152b0820ce..d8187ac06539 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -699,6 +699,12 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+
+ __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+ __mptcp_ofo_queue(msk);
++ if (unlikely(ssk->sk_err)) {
++ if (!sock_owned_by_user(sk))
++ __mptcp_error_report(sk);
++ else
++ set_bit(MPTCP_ERROR_REPORT, &msk->flags);
++ }
+
+ /* If the moves have caught up with the DATA_FIN sequence number
+ * it's time to ack the DATA_FIN and change socket state, but
+@@ -1932,6 +1938,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
+ done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+ mptcp_data_unlock(sk);
+ tcp_cleanup_rbuf(ssk, moved);
++
++ if (unlikely(ssk->sk_err))
++ __mptcp_error_report(sk);
+ unlock_sock_fast(ssk, slowpath);
+ } while (!done);
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 98a5a68ec15d..d6d8ad4f918e 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1033,7 +1033,6 @@ fallback:
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+- ssk->sk_error_report(ssk);
+ tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, 0);
+@@ -1086,41 +1085,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
+ *full_space = tcp_full_space(sk);
+ }
+
+-static void subflow_data_ready(struct sock *sk)
+-{
+- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+- u16 state = 1 << inet_sk_state_load(sk);
+- struct sock *parent = subflow->conn;
+- struct mptcp_sock *msk;
+-
+- msk = mptcp_sk(parent);
+- if (state & TCPF_LISTEN) {
+- /* MPJ subflow are removed from accept queue before reaching here,
+- * avoid stray wakeups
+- */
+- if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+- return;
+-
+- set_bit(MPTCP_DATA_READY, &msk->flags);
+- parent->sk_data_ready(parent);
+- return;
+- }
+-
+- WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+- !subflow->mp_join && !(state & TCPF_CLOSE));
+-
+- if (mptcp_subflow_data_available(sk))
+- mptcp_data_ready(parent, sk);
+-}
+-
+-static void subflow_write_space(struct sock *ssk)
+-{
+- struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+-
+- mptcp_propagate_sndbuf(sk, ssk);
+- mptcp_write_space(sk);
+-}
+-
+ void __mptcp_error_report(struct sock *sk)
+ {
+ struct mptcp_subflow_context *subflow;
+@@ -1161,6 +1125,43 @@ static void subflow_error_report(struct sock *ssk)
+ mptcp_data_unlock(sk);
+ }
+
++static void subflow_data_ready(struct sock *sk)
++{
++ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
++ u16 state = 1 << inet_sk_state_load(sk);
++ struct sock *parent = subflow->conn;
++ struct mptcp_sock *msk;
++
++ msk = mptcp_sk(parent);
++ if (state & TCPF_LISTEN) {
++ /* MPJ subflow are removed from accept queue before reaching here,
++ * avoid stray wakeups
++ */
++ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
++ return;
++
++ set_bit(MPTCP_DATA_READY, &msk->flags);
++ parent->sk_data_ready(parent);
++ return;
++ }
++
++ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
++ !subflow->mp_join && !(state & TCPF_CLOSE));
++
++ if (mptcp_subflow_data_available(sk))
++ mptcp_data_ready(parent, sk);
++ else if (unlikely(sk->sk_err))
++ subflow_error_report(sk);
++}
++
++static void subflow_write_space(struct sock *ssk)
++{
++ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
++
++ mptcp_propagate_sndbuf(sk, ssk);
++ mptcp_write_space(sk);
++}
++
+ static struct inet_connection_sock_af_ops *
+ subflow_default_af_ops(struct sock *sk)
+ {
+@@ -1469,6 +1470,8 @@ static void subflow_state_change(struct sock *sk)
+ */
+ if (mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
++ else if (unlikely(sk->sk_err))
++ subflow_error_report(sk);
+
+ subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+
+--
+2.30.2
+
--- /dev/null
+From 718e9386f0ebbb4ea31224c97db0076eeee4bede Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:40 -0700
+Subject: mptcp: try harder to borrow memory from subflow under pressure
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 72f961320d5d15bfcb26dbe3edaa3f7d25fd2c8a ]
+
+If the host is under sever memory pressure, and RX forward
+memory allocation for the msk fails, we try to borrow the
+required memory from the ingress subflow.
+
+The current attempt is a bit flaky: if skb->truesize is less
+than SK_MEM_QUANTUM, the ssk will not release any memory, and
+the next schedule will fail again.
+
+Instead, directly move the required amount of pages from the
+ssk to the msk, if available
+
+Fixes: 9c3f94e1681b ("mptcp: add missing memory scheduling in the rx path")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 225b98821517..1d981babbcfe 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -287,11 +287,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
+
+ /* try to fetch required memory from subflow */
+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
+- if (ssk->sk_forward_alloc < skb->truesize)
+- goto drop;
+- __sk_mem_reclaim(ssk, skb->truesize);
+- if (!sk_rmem_schedule(sk, skb, skb->truesize))
++ int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
++
++ if (ssk->sk_forward_alloc < amount)
+ goto drop;
++
++ ssk->sk_forward_alloc -= amount;
++ sk->sk_forward_alloc += amount;
+ }
+
+ /* the skb map_seq accounts for the skb offset:
+--
+2.30.2
+
--- /dev/null
+From 54fea02c17b8ff0e624c79dbeb56b54362b4f452 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:41 -0700
+Subject: mptcp: wake-up readers only for in sequence data
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 99d1055ce2469dca3dd14be0991ff8133e25e3d0 ]
+
+Currently we rely on the subflow->data_avail field, which is subject to
+races:
+
+ ssk1
+ skb len = 500 DSS(seq=1, len=1000, off=0)
+ # data_avail == MPTCP_SUBFLOW_DATA_AVAIL
+
+ ssk2
+ skb len = 500 DSS(seq = 501, len=1000)
+ # data_avail == MPTCP_SUBFLOW_DATA_AVAIL
+
+ ssk1
+ skb len = 500 DSS(seq = 1, len=1000, off =500)
+ # still data_avail == MPTCP_SUBFLOW_DATA_AVAIL,
+ # as the skb is covered by a pre-existing map,
+ # which was in-sequence at reception time.
+
+Instead we can explicitly check if some has been received in-sequence,
+propagating the info from __mptcp_move_skbs_from_subflow().
+
+Additionally add the 'ONCE' annotation to the 'data_avail' memory
+access, as msk will read it outside the subflow socket lock.
+
+Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 33 ++++++++++++---------------------
+ net/mptcp/protocol.h | 1 -
+ net/mptcp/subflow.c | 23 +++++++++--------------
+ 3 files changed, 21 insertions(+), 36 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 1d981babbcfe..78152b0820ce 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -689,15 +689,13 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
+ /* In most cases we will be able to lock the mptcp socket. If its already
+ * owned, we need to defer to the work queue to avoid ABBA deadlock.
+ */
+-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
++static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+ {
+ struct sock *sk = (struct sock *)msk;
+ unsigned int moved = 0;
+
+ if (inet_sk_state_load(sk) == TCP_CLOSE)
+- return;
+-
+- mptcp_data_lock(sk);
++ return false;
+
+ __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+ __mptcp_ofo_queue(msk);
+@@ -709,7 +707,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+ */
+ if (mptcp_pending_data_fin(sk, NULL))
+ mptcp_schedule_work(sk);
+- mptcp_data_unlock(sk);
++ return moved > 0;
+ }
+
+ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+@@ -717,7 +715,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int sk_rbuf, ssk_rbuf;
+- bool wake;
+
+ /* The peer can send data while we are shutting down this
+ * subflow at msk destruction time, but we must avoid enqueuing
+@@ -726,28 +723,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+ if (unlikely(subflow->disposable))
+ return;
+
+- /* move_skbs_to_msk below can legitly clear the data_avail flag,
+- * but we will need later to properly woke the reader, cache its
+- * value
+- */
+- wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
+- if (wake)
+- set_bit(MPTCP_DATA_READY, &msk->flags);
+-
+ ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
+ sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
+ if (unlikely(ssk_rbuf > sk_rbuf))
+ sk_rbuf = ssk_rbuf;
+
+- /* over limit? can't append more skbs to msk */
++ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
+ if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
+- goto wake;
+-
+- move_skbs_to_msk(msk, ssk);
++ return;
+
+-wake:
+- if (wake)
++ /* Wake-up the reader only for in-sequence data */
++ mptcp_data_lock(sk);
++ if (move_skbs_to_msk(msk, ssk)) {
++ set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk->sk_data_ready(sk);
++ }
++ mptcp_data_unlock(sk);
+ }
+
+ void __mptcp_flush_join_list(struct mptcp_sock *msk)
+@@ -850,7 +841,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
+ sock_owned_by_me(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+- if (subflow->data_avail)
++ if (READ_ONCE(subflow->data_avail))
+ return mptcp_subflow_tcp_sock(subflow);
+ }
+
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index e21a5bc36cf0..14e89e4bd4a8 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -372,7 +372,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
+ enum mptcp_data_avail {
+ MPTCP_SUBFLOW_NODATA,
+ MPTCP_SUBFLOW_DATA_AVAIL,
+- MPTCP_SUBFLOW_OOO_DATA
+ };
+
+ struct mptcp_delegated_action {
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 8425cd393bf3..1ee4d106ce1c 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -974,7 +974,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
+ pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
+ subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
+ if (!skb_peek(&ssk->sk_receive_queue))
+- subflow->data_avail = 0;
++ WRITE_ONCE(subflow->data_avail, 0);
+ if (subflow->data_avail)
+ return true;
+
+@@ -1012,18 +1012,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
+ ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
+ pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
+ ack_seq);
+- if (ack_seq == old_ack) {
+- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+- break;
+- } else if (after64(ack_seq, old_ack)) {
+- subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
+- break;
++ if (unlikely(before64(ack_seq, old_ack))) {
++ mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
++ continue;
+ }
+
+- /* only accept in-sequence mapping. Old values are spurious
+- * retransmission
+- */
+- mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
++ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
++ break;
+ }
+ return true;
+
+@@ -1041,7 +1036,7 @@ fallback:
+ ssk->sk_error_report(ssk);
+ tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+- subflow->data_avail = 0;
++ WRITE_ONCE(subflow->data_avail, 0);
+ return false;
+ }
+
+@@ -1051,7 +1046,7 @@ fallback:
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
++ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
+ }
+
+@@ -1063,7 +1058,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
+ if (subflow->map_valid &&
+ mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
+ subflow->map_valid = 0;
+- subflow->data_avail = 0;
++ WRITE_ONCE(subflow->data_avail, 0);
+
+ pr_debug("Done with mapping: seq=%u data_len=%u",
+ subflow->map_subflow_seq,
+--
+2.30.2
+
--- /dev/null
+From b7c00b482b46f791ce5ccc61fa6a12a82da8c6a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 07:47:15 -0700
+Subject: net/af_unix: fix a data-race in unix_dgram_sendmsg /
+ unix_release_sock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a494bd642d9120648b06bb7d28ce6d05f55a7819 ]
+
+While unix_may_send(sk, osk) is called while osk is locked, it appears
+unix_release_sock() can overwrite unix_peer() after this lock has been
+released, making KCSAN unhappy.
+
+Changing unix_release_sock() to access/change unix_peer()
+before lock is released should fix this issue.
+
+BUG: KCSAN: data-race in unix_dgram_sendmsg / unix_release_sock
+
+write to 0xffff88810465a338 of 8 bytes by task 20852 on cpu 1:
+ unix_release_sock+0x4ed/0x6e0 net/unix/af_unix.c:558
+ unix_release+0x2f/0x50 net/unix/af_unix.c:859
+ __sock_release net/socket.c:599 [inline]
+ sock_close+0x6c/0x150 net/socket.c:1258
+ __fput+0x25b/0x4e0 fs/file_table.c:280
+ ____fput+0x11/0x20 fs/file_table.c:313
+ task_work_run+0xae/0x130 kernel/task_work.c:164
+ tracehook_notify_resume include/linux/tracehook.h:189 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:175 [inline]
+ exit_to_user_mode_prepare+0x156/0x190 kernel/entry/common.c:209
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
+ syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:302
+ do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff88810465a338 of 8 bytes by task 20888 on cpu 0:
+ unix_may_send net/unix/af_unix.c:189 [inline]
+ unix_dgram_sendmsg+0x923/0x1610 net/unix/af_unix.c:1712
+ sock_sendmsg_nosec net/socket.c:654 [inline]
+ sock_sendmsg net/socket.c:674 [inline]
+ ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
+ ___sys_sendmsg net/socket.c:2404 [inline]
+ __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
+ __do_sys_sendmmsg net/socket.c:2519 [inline]
+ __se_sys_sendmmsg net/socket.c:2516 [inline]
+ __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
+ do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0xffff888167905400 -> 0x0000000000000000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 20888 Comm: syz-executor.0 Not tainted 5.13.0-rc5-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 5a31307ceb76..5d1192ceb139 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -535,12 +535,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
+ u->path.mnt = NULL;
+ state = sk->sk_state;
+ sk->sk_state = TCP_CLOSE;
++
++ skpair = unix_peer(sk);
++ unix_peer(sk) = NULL;
++
+ unix_state_unlock(sk);
+
+ wake_up_interruptible_all(&u->peer_wait);
+
+- skpair = unix_peer(sk);
+-
+ if (skpair != NULL) {
+ if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+ unix_state_lock(skpair);
+@@ -555,7 +557,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
+
+ unix_dgram_peer_wake_disconnect(sk, skpair);
+ sock_put(skpair); /* It may now die */
+- unix_peer(sk) = NULL;
+ }
+
+ /* Try to flush out this socket. Throw out buffers at least */
+--
+2.30.2
+
--- /dev/null
+From 903b702ae55cf85e6aea0fc6ac5b94319a4904e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Jun 2021 07:32:32 +0800
+Subject: net: cdc_eem: fix tx fixup skb leak
+
+From: Linyu Yuan <linyyuan@codeaurora.org>
+
+[ Upstream commit c3b26fdf1b32f91c7a3bc743384b4a298ab53ad7 ]
+
+when usbnet transmit a skb, eem fixup it in eem_tx_fixup(),
+if skb_copy_expand() failed, it return NULL,
+usbnet_start_xmit() will have no chance to free original skb.
+
+fix it by free orginal skb in eem_tx_fixup() first,
+then check skb clone status, if failed, return NULL to usbnet.
+
+Fixes: 9f722c0978b0 ("usbnet: CDC EEM support (v5)")
+Signed-off-by: Linyu Yuan <linyyuan@codeaurora.org>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/cdc_eem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
+index 0eeec80bec31..e4a570366646 100644
+--- a/drivers/net/usb/cdc_eem.c
++++ b/drivers/net/usb/cdc_eem.c
+@@ -123,10 +123,10 @@ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
+ }
+
+ skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags);
++ dev_kfree_skb_any(skb);
+ if (!skb2)
+ return NULL;
+
+- dev_kfree_skb_any(skb);
+ skb = skb2;
+
+ done:
+--
+2.30.2
+
--- /dev/null
+From 14b26416c1ccba70332dc4494568247712b21af7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 01:05:49 -0700
+Subject: net: cdc_ncm: switch to eth%d interface naming
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Żenczykowski <maze@google.com>
+
+[ Upstream commit c1a3d4067309451e68c33dbd356032549cc0bd8e ]
+
+This is meant to make the host side cdc_ncm interface consistently
+named just like the older CDC protocols: cdc_ether & cdc_ecm
+(and even rndis_host), which all use 'FLAG_ETHER | FLAG_POINTTOPOINT'.
+
+include/linux/usb/usbnet.h:
+ #define FLAG_ETHER 0x0020 /* maybe use "eth%d" names */
+ #define FLAG_WLAN 0x0080 /* use "wlan%d" names */
+ #define FLAG_WWAN 0x0400 /* use "wwan%d" names */
+ #define FLAG_POINTTOPOINT 0x1000 /* possibly use "usb%d" names */
+
+drivers/net/usb/usbnet.c @ line 1711:
+ strcpy (net->name, "usb%d");
+ ...
+ // heuristic: "usb%d" for links we know are two-host,
+ // else "eth%d" when there's reasonable doubt. userspace
+ // can rename the link if it knows better.
+ if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
+ ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
+ (net->dev_addr [0] & 0x02) == 0))
+ strcpy (net->name, "eth%d");
+ /* WLAN devices should always be named "wlan%d" */
+ if ((dev->driver_info->flags & FLAG_WLAN) != 0)
+ strcpy(net->name, "wlan%d");
+ /* WWAN devices should always be named "wwan%d" */
+ if ((dev->driver_info->flags & FLAG_WWAN) != 0)
+ strcpy(net->name, "wwan%d");
+
+So by using ETHER | POINTTOPOINT the interface naming is
+either usb%d or eth%d based on the global uniqueness of the
+mac address of the device.
+
+Without this 2.5gbps ethernet dongles which all seem to use the cdc_ncm
+driver end up being called usb%d instead of eth%d even though they're
+definitely not two-host. (All 1gbps & 5gbps ethernet usb dongles I've
+tested don't hit this problem due to use of different drivers, primarily
+r8152 and aqc111)
+
+Fixes tag is based purely on git blame, and is really just here to make
+sure this hits LTS branches newer than v4.5.
+
+Cc: Lorenzo Colitti <lorenzo@google.com>
+Fixes: 4d06dd537f95 ("cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind")
+Signed-off-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/cdc_ncm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
+index 8acf30115428..dc3d84b43e4e 100644
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -1902,7 +1902,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
+ static const struct driver_info cdc_ncm_info = {
+ .description = "CDC NCM",
+ .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+- | FLAG_LINK_INTR,
++ | FLAG_LINK_INTR | FLAG_ETHER,
+ .bind = cdc_ncm_bind,
+ .unbind = cdc_ncm_unbind,
+ .manage_power = usbnet_manage_power,
+--
+2.30.2
+
--- /dev/null
+From 9c7484f893504483e836e7157825dc4b94943831 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 14:15:35 +0300
+Subject: net: dsa: felix: re-enable TX flow control in ocelot_port_flush()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1650bdb1c516c248fb06f6d076559ff6437a5853 ]
+
+Because flow control is set up statically in ocelot_init_port(), and not
+in phylink_mac_link_up(), what happens is that after the blamed commit,
+the flow control remains disabled after the port flushing procedure.
+
+Fixes: eb4733d7cffc ("net: dsa: felix: implement port flushing on .phylink_mac_link_down")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mscc/ocelot.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
+index 46e5c9136bac..0c4c976548c8 100644
+--- a/drivers/net/ethernet/mscc/ocelot.c
++++ b/drivers/net/ethernet/mscc/ocelot.c
+@@ -378,6 +378,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
+
+ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ {
++ unsigned int pause_ena;
+ int err, val;
+
+ /* Disable dequeuing from the egress queues */
+@@ -386,6 +387,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ QSYS_PORT_MODE, port);
+
+ /* Disable flow control */
++ ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+
+ /* Disable priority flow control */
+@@ -421,6 +423,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ /* Clear flushing again. */
+ ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
+
++ /* Re-enable flow control */
++ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
++
+ return err;
+ }
+ EXPORT_SYMBOL(ocelot_port_flush);
+--
+2.30.2
+
--- /dev/null
+From 3c85b12e59de653bc5da0d73b850e4017b67312e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:42:54 +0300
+Subject: net: ena: fix DMA mapping function issues in XDP
+
+From: Shay Agroskin <shayagr@amazon.com>
+
+[ Upstream commit 504fd6a5390c30b1b7670768e314dd5d473da06a ]
+
+This patch fixes several bugs found when (DMA/LLQ) mapping a packet for
+transmission. The mapping procedure makes the transmitted packet
+accessible by the device.
+When using LLQ, this requires copying the packet's header to push header
+(which would be passed to LLQ) and creating DMA mapping for the payload
+(if the packet doesn't fit the maximum push length).
+When not using LLQ, we map the whole packet with DMA.
+
+The following bugs are fixed in the code:
+ 1. Add support for non-LLQ machines:
+ The ena_xdp_tx_map_frame() function assumed that LLQ is
+ supported, and never mapped the whole packet using DMA. On some
+ instances, which don't support LLQ, this causes loss of traffic.
+
+ 2. Wrong DMA buffer length passed to device:
+ When using LLQ, the first 'tx_max_header_size' bytes of the
+ packet would be copied to push header. The rest of the packet
+ would be copied to a DMA'd buffer.
+
+ 3. Freeing the XDP buffer twice in case of a mapping error:
+ In case a buffer DMA mapping fails, the function uses
+ xdp_return_frame_rx_napi() to free the RX buffer and returns from
+ the function with an error. XDP frames that fail to xmit get
+ freed by the kernel and so there is no need for this call.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 54 ++++++++++----------
+ 1 file changed, 28 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 102f2c91fdb8..20f8012bbe04 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+ struct ena_tx_buffer *tx_info,
+ struct xdp_frame *xdpf,
+- void **push_hdr,
+- u32 *push_len)
++ struct ena_com_tx_ctx *ena_tx_ctx)
+ {
+ struct ena_adapter *adapter = xdp_ring->adapter;
+ struct ena_com_buf *ena_buf;
+- dma_addr_t dma = 0;
++ int push_len = 0;
++ dma_addr_t dma;
++ void *data;
+ u32 size;
+
+ tx_info->xdpf = xdpf;
++ data = tx_info->xdpf->data;
+ size = tx_info->xdpf->len;
+- ena_buf = tx_info->bufs;
+
+- /* llq push buffer */
+- *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+- *push_hdr = tx_info->xdpf->data;
++ if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
++ /* Designate part of the packet for LLQ */
++ push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
++
++ ena_tx_ctx->push_header = data;
++
++ size -= push_len;
++ data += push_len;
++ }
++
++ ena_tx_ctx->header_len = push_len;
+
+- if (size - *push_len > 0) {
++ if (size > 0) {
+ dma = dma_map_single(xdp_ring->dev,
+- *push_hdr + *push_len,
+- size - *push_len,
++ data,
++ size,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+ goto error_report_dma_error;
+
+- tx_info->map_linear_data = 1;
+- tx_info->num_of_bufs = 1;
+- }
++ tx_info->map_linear_data = 0;
+
+- ena_buf->paddr = dma;
+- ena_buf->len = size;
++ ena_buf = tx_info->bufs;
++ ena_buf->paddr = dma;
++ ena_buf->len = size;
++
++ ena_tx_ctx->ena_bufs = ena_buf;
++ ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
++ }
+
+ return 0;
+
+@@ -274,10 +286,6 @@ error_report_dma_error:
+ &xdp_ring->syncp);
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+
+- xdp_return_frame_rx_napi(tx_info->xdpf);
+- tx_info->xdpf = NULL;
+- tx_info->num_of_bufs = 0;
+-
+ return -EINVAL;
+ }
+
+@@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+ struct ena_com_tx_ctx ena_tx_ctx = {};
+ struct ena_tx_buffer *tx_info;
+ u16 next_to_use, req_id;
+- void *push_hdr;
+- u32 push_len;
+ int rc;
+
+ next_to_use = xdp_ring->next_to_use;
+@@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+- rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
++ rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
+ if (unlikely(rc))
+ goto error_drop_packet;
+
+- ena_tx_ctx.ena_bufs = tx_info->bufs;
+- ena_tx_ctx.push_header = push_hdr;
+- ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+ ena_tx_ctx.req_id = req_id;
+- ena_tx_ctx.header_len = push_len;
+
+ rc = ena_xmit_common(dev,
+ xdp_ring,
+--
+2.30.2
+
--- /dev/null
+From 194a62ff94f47f93dd7b572b8298d4634be2e5e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 16:49:02 +0300
+Subject: net: ethernet: fix potential use-after-free in ec_bhf_remove
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 9cca0c2d70149160407bda9a9446ce0c29b6e6c6 ]
+
+static void ec_bhf_remove(struct pci_dev *dev)
+{
+...
+ struct ec_bhf_priv *priv = netdev_priv(net_dev);
+
+ unregister_netdev(net_dev);
+ free_netdev(net_dev);
+
+ pci_iounmap(dev, priv->dma_io);
+ pci_iounmap(dev, priv->io);
+...
+}
+
+priv is netdev private data, but it is used
+after free_netdev(). It can cause use-after-free when accessing priv
+pointer. So, fix it by moving free_netdev() after pci_iounmap()
+calls.
+
+Fixes: 6af55ff52b02 ("Driver for Beckhoff CX5020 EtherCAT master module.")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ec_bhf.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
+index 46b0dbab8aad..7c992172933b 100644
+--- a/drivers/net/ethernet/ec_bhf.c
++++ b/drivers/net/ethernet/ec_bhf.c
+@@ -576,10 +576,12 @@ static void ec_bhf_remove(struct pci_dev *dev)
+ struct ec_bhf_priv *priv = netdev_priv(net_dev);
+
+ unregister_netdev(net_dev);
+- free_netdev(net_dev);
+
+ pci_iounmap(dev, priv->dma_io);
+ pci_iounmap(dev, priv->io);
++
++ free_netdev(net_dev);
++
+ pci_release_regions(dev);
+ pci_clear_master(dev);
+ pci_disable_device(dev);
+--
+2.30.2
+
--- /dev/null
+From f70ccd9f48488e65ef6cdf910d0176e3513ff135 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 17:14:26 +0800
+Subject: net: fec_ptp: fix issue caused by refactor the fec_devtype
+
+From: Joakim Zhang <qiangqing.zhang@nxp.com>
+
+[ Upstream commit d23765646e71b43ed2b809930411ba5c0aadee7b ]
+
+Commit da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
+refactor the fec_devtype, need adjust ptp driver accordingly.
+
+Fixes: da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
+Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_ptp.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
+index 1753807cbf97..ce8e5555f3e0 100644
+--- a/drivers/net/ethernet/freescale/fec_ptp.c
++++ b/drivers/net/ethernet/freescale/fec_ptp.c
+@@ -215,15 +215,13 @@ static u64 fec_ptp_read(const struct cyclecounter *cc)
+ {
+ struct fec_enet_private *fep =
+ container_of(cc, struct fec_enet_private, cc);
+- const struct platform_device_id *id_entry =
+- platform_get_device_id(fep->pdev);
+ u32 tempval;
+
+ tempval = readl(fep->hwp + FEC_ATIME_CTRL);
+ tempval |= FEC_T_CTRL_CAPTURE;
+ writel(tempval, fep->hwp + FEC_ATIME_CTRL);
+
+- if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE)
++ if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
+ udelay(1);
+
+ return readl(fep->hwp + FEC_ATIME);
+--
+2.30.2
+
--- /dev/null
+From 210c99dd0893e373b60d4ad69bdaae475eb48685 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 22:09:06 +0300
+Subject: net: hamradio: fix memory leak in mkiss_close
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 7edcc682301492380fbdd604b4516af5ae667a13 ]
+
+My local syzbot instance hit memory leak in
+mkiss_open()[1]. The problem was in missing
+free_netdev() in mkiss_close().
+
+In mkiss_open() netdevice is allocated and then
+registered, but in mkiss_close() netdevice was
+only unregistered, but not freed.
+
+Fail log:
+
+BUG: memory leak
+unreferenced object 0xffff8880281ba000 (size 4096):
+ comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+ hex dump (first 32 bytes):
+ 61 78 30 00 00 00 00 00 00 00 00 00 00 00 00 00 ax0.............
+ 00 27 fa 2a 80 88 ff ff 00 00 00 00 00 00 00 00 .'.*............
+ backtrace:
+ [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+ [<ffffffff8706e7e8>] alloc_netdev_mqs+0x98/0xe80
+ [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+ [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+ [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+ [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+ [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+ [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+ [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff8880141a9a00 (size 96):
+ comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+ hex dump (first 32 bytes):
+ e8 a2 1b 28 80 88 ff ff e8 a2 1b 28 80 88 ff ff ...(.......(....
+ 98 92 9c aa b0 40 02 00 00 00 00 00 00 00 00 00 .....@..........
+ backtrace:
+ [<ffffffff8709f68b>] __hw_addr_create_ex+0x5b/0x310
+ [<ffffffff8709fb38>] __hw_addr_add_ex+0x1f8/0x2b0
+ [<ffffffff870a0c7b>] dev_addr_init+0x10b/0x1f0
+ [<ffffffff8706e88b>] alloc_netdev_mqs+0x13b/0xe80
+ [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+ [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+ [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+ [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+ [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+ [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+ [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff8880219bfc00 (size 512):
+ comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+ hex dump (first 32 bytes):
+ 00 a0 1b 28 80 88 ff ff 80 8f b1 8d ff ff ff ff ...(............
+ 80 8f b1 8d ff ff ff ff 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+ [<ffffffff8706eec7>] alloc_netdev_mqs+0x777/0xe80
+ [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+ [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+ [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+ [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+ [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+ [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+ [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff888029b2b200 (size 256):
+ comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+ [<ffffffff8706f062>] alloc_netdev_mqs+0x912/0xe80
+ [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+ [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+ [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+ [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+ [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+ [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+ [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Fixes: 815f62bf7427 ("[PATCH] SMP rewrite of mkiss")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hamradio/mkiss.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
+index 17be2bb2985c..920e9f888cc3 100644
+--- a/drivers/net/hamradio/mkiss.c
++++ b/drivers/net/hamradio/mkiss.c
+@@ -799,6 +799,7 @@ static void mkiss_close(struct tty_struct *tty)
+ ax->tty = NULL;
+
+ unregister_netdev(ax->dev);
++ free_netdev(ax->dev);
+ }
+
+ /* Perform I/O control on an active ax25 channel. */
+--
+2.30.2
+
--- /dev/null
+From de862b0cf70b3c24107b4dec5973963e62338dd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 17:59:25 +0800
+Subject: net: ipv4: fix memory leak in ip_mc_add1_src
+
+From: Chengyang Fan <cy.fan@huawei.com>
+
+[ Upstream commit d8e2973029b8b2ce477b564824431f3385c77083 ]
+
+BUG: memory leak
+unreferenced object 0xffff888101bc4c00 (size 32):
+ comm "syz-executor527", pid 360, jiffies 4294807421 (age 19.329s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 01 00 00 00 00 00 00 00 ac 14 14 bb 00 00 02 00 ................
+ backtrace:
+ [<00000000f17c5244>] kmalloc include/linux/slab.h:558 [inline]
+ [<00000000f17c5244>] kzalloc include/linux/slab.h:688 [inline]
+ [<00000000f17c5244>] ip_mc_add1_src net/ipv4/igmp.c:1971 [inline]
+ [<00000000f17c5244>] ip_mc_add_src+0x95f/0xdb0 net/ipv4/igmp.c:2095
+ [<000000001cb99709>] ip_mc_source+0x84c/0xea0 net/ipv4/igmp.c:2416
+ [<0000000052cf19ed>] do_ip_setsockopt net/ipv4/ip_sockglue.c:1294 [inline]
+ [<0000000052cf19ed>] ip_setsockopt+0x114b/0x30c0 net/ipv4/ip_sockglue.c:1423
+ [<00000000477edfbc>] raw_setsockopt+0x13d/0x170 net/ipv4/raw.c:857
+ [<00000000e75ca9bb>] __sys_setsockopt+0x158/0x270 net/socket.c:2117
+ [<00000000bdb993a8>] __do_sys_setsockopt net/socket.c:2128 [inline]
+ [<00000000bdb993a8>] __se_sys_setsockopt net/socket.c:2125 [inline]
+ [<00000000bdb993a8>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2125
+ [<000000006a1ffdbd>] do_syscall_64+0x40/0x80 arch/x86/entry/common.c:47
+ [<00000000b11467c4>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+In commit 24803f38a5c0 ("igmp: do not remove igmp souce list info when set
+link down"), the ip_mc_clear_src() in ip_mc_destroy_dev() was removed,
+because it was also called in igmpv3_clear_delrec().
+
+Rough callgraph:
+
+inetdev_destroy
+-> ip_mc_destroy_dev
+ -> igmpv3_clear_delrec
+ -> ip_mc_clear_src
+-> RCU_INIT_POINTER(dev->ip_ptr, NULL)
+
+However, ip_mc_clear_src() called in igmpv3_clear_delrec() doesn't
+release in_dev->mc_list->sources. And RCU_INIT_POINTER() assigns the
+NULL to dev->ip_ptr. As a result, in_dev cannot be obtained through
+inetdev_by_index() and then in_dev->mc_list->sources cannot be released
+by ip_mc_del1_src() in the sock_close. Rough call sequence goes like:
+
+sock_close
+-> __sock_release
+ -> inet_release
+ -> ip_mc_drop_socket
+ -> inetdev_by_index
+ -> ip_mc_leave_src
+ -> ip_mc_del_src
+ -> ip_mc_del1_src
+
+So we still need to call ip_mc_clear_src() in ip_mc_destroy_dev() to free
+in_dev->mc_list->sources.
+
+Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info ...")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Chengyang Fan <cy.fan@huawei.com>
+Acked-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 7b272bbed2b4..6b3c558a4f23 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
+ while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
+ in_dev->mc_list = i->next_rcu;
+ in_dev->mc_count--;
++ ip_mc_clear_src(i);
+ ip_ma_put(i);
+ }
+ }
+--
+2.30.2
+
--- /dev/null
+From f84dfb967850facd132caee98ec1709932ffbb87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 09:51:58 +0800
+Subject: net: ipv4: fix memory leak in netlbl_cipsov4_add_std
+
+From: Nanyong Sun <sunnanyong@huawei.com>
+
+[ Upstream commit d612c3f3fae221e7ea736d196581c2217304bbbc ]
+
+Reported by syzkaller:
+BUG: memory leak
+unreferenced object 0xffff888105df7000 (size 64):
+comm "syz-executor842", pid 360, jiffies 4294824824 (age 22.546s)
+hex dump (first 32 bytes):
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+backtrace:
+[<00000000e67ed558>] kmalloc include/linux/slab.h:590 [inline]
+[<00000000e67ed558>] kzalloc include/linux/slab.h:720 [inline]
+[<00000000e67ed558>] netlbl_cipsov4_add_std net/netlabel/netlabel_cipso_v4.c:145 [inline]
+[<00000000e67ed558>] netlbl_cipsov4_add+0x390/0x2340 net/netlabel/netlabel_cipso_v4.c:416
+[<0000000006040154>] genl_family_rcv_msg_doit.isra.0+0x20e/0x320 net/netlink/genetlink.c:739
+[<00000000204d7a1c>] genl_family_rcv_msg net/netlink/genetlink.c:783 [inline]
+[<00000000204d7a1c>] genl_rcv_msg+0x2bf/0x4f0 net/netlink/genetlink.c:800
+[<00000000c0d6a995>] netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2504
+[<00000000d78b9d2c>] genl_rcv+0x24/0x40 net/netlink/genetlink.c:811
+[<000000009733081b>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
+[<000000009733081b>] netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1340
+[<00000000d5fd43b8>] netlink_sendmsg+0x789/0xc70 net/netlink/af_netlink.c:1929
+[<000000000a2d1e40>] sock_sendmsg_nosec net/socket.c:654 [inline]
+[<000000000a2d1e40>] sock_sendmsg+0x139/0x170 net/socket.c:674
+[<00000000321d1969>] ____sys_sendmsg+0x658/0x7d0 net/socket.c:2350
+[<00000000964e16bc>] ___sys_sendmsg+0xf8/0x170 net/socket.c:2404
+[<000000001615e288>] __sys_sendmsg+0xd3/0x190 net/socket.c:2433
+[<000000004ee8b6a5>] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:47
+[<00000000171c7cee>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The memory of doi_def->map.std pointing is allocated in
+netlbl_cipsov4_add_std, but no place has freed it. It should be
+freed in cipso_v4_doi_free which frees the cipso DOI resource.
+
+Fixes: 96cb8e3313c7a ("[NetLabel]: CIPSOv4 and Unlabeled packet integration")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/cipso_ipv4.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
+index bfaf327e9d12..e0480c6cebaa 100644
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+ kfree(doi_def->map.std->lvl.local);
+ kfree(doi_def->map.std->cat.cipso);
+ kfree(doi_def->map.std->cat.local);
++ kfree(doi_def->map.std);
+ break;
+ }
+ kfree(doi_def);
+--
+2.30.2
+
--- /dev/null
+From 55d00539184fc2000fa31e2a0f4a045e2c4bc4e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 23:21:07 +0200
+Subject: net: lantiq: disable interrupt before sheduling NAPI
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit f2386cf7c5f4ff5d7b584f5d92014edd7df6c676 ]
+
+This patch fixes TX hangs with threaded NAPI enabled. The scheduled
+NAPI seems to be executed in parallel with the interrupt on second
+thread. Sometimes it happens that ltq_dma_disable_irq() is executed
+after xrx200_tx_housekeeping(). The symptom is that TX interrupts
+are disabled in the DMA controller. As a result, the TX hangs after
+a few seconds of the iperf test. Scheduling NAPI after disabling
+interrupts fixes this issue.
+
+Tested on Lantiq xRX200 (BT Home Hub 5A).
+
+Fixes: 9423361da523 ("net: lantiq: Disable IRQs only if NAPI gets scheduled ")
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index 135ba5b6ae98..3da494df72f3 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -352,8 +352,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
+ struct xrx200_chan *ch = ptr;
+
+ if (napi_schedule_prep(&ch->napi)) {
+- __napi_schedule(&ch->napi);
+ ltq_dma_disable_irq(&ch->dma);
++ __napi_schedule(&ch->napi);
+ }
+
+ ltq_dma_ack_irq(&ch->dma);
+--
+2.30.2
+
--- /dev/null
+From 6181162d9d7675fb4a83fc28fee66ab5629be730 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 22:29:59 +0800
+Subject: net: make get_net_ns return error if NET_NS is disabled
+
+From: Changbin Du <changbin.du@gmail.com>
+
+[ Upstream commit ea6932d70e223e02fea3ae20a4feff05d7c1ea9a ]
+
+There is a panic in socket ioctl cmd SIOCGSKNS when NET_NS is not enabled.
+The reason is that nsfs tries to access ns->ops but the proc_ns_operations
+is not implemented in this case.
+
+[7.670023] Unable to handle kernel NULL pointer dereference at virtual address 00000010
+[7.670268] pgd = 32b54000
+[7.670544] [00000010] *pgd=00000000
+[7.671861] Internal error: Oops: 5 [#1] SMP ARM
+[7.672315] Modules linked in:
+[7.672918] CPU: 0 PID: 1 Comm: systemd Not tainted 5.13.0-rc3-00375-g6799d4f2da49 #16
+[7.673309] Hardware name: Generic DT based system
+[7.673642] PC is at nsfs_evict+0x24/0x30
+[7.674486] LR is at clear_inode+0x20/0x9c
+
+The same to tun SIOCGSKNS command.
+
+To fix this problem, we make get_net_ns() return -EINVAL when NET_NS is
+disabled. Meanwhile move it to right place net/core/net_namespace.c.
+
+Signed-off-by: Changbin Du <changbin.du@gmail.com>
+Fixes: c62cce2caee5 ("net: add an ioctl to get a socket network namespace")
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/socket.h | 2 --
+ include/net/net_namespace.h | 7 +++++++
+ net/core/net_namespace.c | 12 ++++++++++++
+ net/socket.c | 13 -------------
+ 4 files changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/include/linux/socket.h b/include/linux/socket.h
+index 385894b4a8bb..42222a84167f 100644
+--- a/include/linux/socket.h
++++ b/include/linux/socket.h
+@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
+ int __user *usockvec);
+ extern int __sys_shutdown_sock(struct socket *sock, int how);
+ extern int __sys_shutdown(int fd, int how);
+-
+-extern struct ns_common *get_net_ns(struct ns_common *ns);
+ #endif /* _LINUX_SOCKET_H */
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index dcaee24a4d87..14b6f7f44532 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -197,6 +197,8 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
+ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
+ void net_ns_barrier(void);
++
++struct ns_common *get_net_ns(struct ns_common *ns);
+ #else /* CONFIG_NET_NS */
+ #include <linux/sched.h>
+ #include <linux/nsproxy.h>
+@@ -216,6 +218,11 @@ static inline void net_ns_get_ownership(const struct net *net,
+ }
+
+ static inline void net_ns_barrier(void) {}
++
++static inline struct ns_common *get_net_ns(struct ns_common *ns)
++{
++ return ERR_PTR(-EINVAL);
++}
+ #endif /* CONFIG_NET_NS */
+
+
+diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
+index 43b6ac4c4439..cc8dafb25d61 100644
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -641,6 +641,18 @@ void __put_net(struct net *net)
+ }
+ EXPORT_SYMBOL_GPL(__put_net);
+
++/**
++ * get_net_ns - increment the refcount of the network namespace
++ * @ns: common namespace (net)
++ *
++ * Returns the net's common namespace.
++ */
++struct ns_common *get_net_ns(struct ns_common *ns)
++{
++ return &get_net(container_of(ns, struct net, ns))->ns;
++}
++EXPORT_SYMBOL_GPL(get_net_ns);
++
+ struct net *get_net_ns_by_fd(int fd)
+ {
+ struct file *file;
+diff --git a/net/socket.c b/net/socket.c
+index 84a8049c2b09..03259cb919f7 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
+ * what to do with it - that's up to the protocol still.
+ */
+
+-/**
+- * get_net_ns - increment the refcount of the network namespace
+- * @ns: common namespace (net)
+- *
+- * Returns the net's common namespace.
+- */
+-
+-struct ns_common *get_net_ns(struct ns_common *ns)
+-{
+- return &get_net(container_of(ns, struct net, ns))->ns;
+-}
+-EXPORT_SYMBOL_GPL(get_net_ns);
+-
+ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+ {
+ struct socket *sock;
+--
+2.30.2
+
--- /dev/null
+From ab0b25b074b2bf74b793daa4be4ca25c6124fa6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:03:25 -0600
+Subject: net: mhi_net: Update the transmit handler prototype
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+[ Upstream commit 2214fb53006e6cfa6371b706070cb99794c68c3b ]
+
+Update the function prototype of mhi_ndo_xmit to match
+ndo_start_xmit. This otherwise leads to run time failures when
+CFI is enabled in kernel.
+
+Fixes: 3ffec6a14f24 ("net: Add mhi-net driver")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mhi/net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
+index f59960876083..8e7f8728998f 100644
+--- a/drivers/net/mhi/net.c
++++ b/drivers/net/mhi/net.c
+@@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
+ return 0;
+ }
+
+-static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
++static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+ {
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+ const struct mhi_net_proto *proto = mhi_netdev->proto;
+--
+2.30.2
+
--- /dev/null
+From 492739f83893a62f03f06005b716310b7436d424 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Mar 2021 19:57:14 +0200
+Subject: net/mlx5: Check that driver was probed prior attaching the device
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 2058cc9c8041fde9c0bdd8e868c72b137cff8563 ]
+
+The device can be requested to be attached despite being not probed.
+This situation is possible if devlink reload races with module removal,
+and the following kernel panic is an outcome of such race.
+
+ mlx5_core 0000:00:09.0: firmware version: 4.7.9999
+ mlx5_core 0000:00:09.0: 0.000 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x255 link)
+ BUG: unable to handle page fault for address: fffffffffffffff0
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 3218067 P4D 3218067 PUD 321a067 PMD 0
+ Oops: 0000 [#1] SMP KASAN NOPTI
+ CPU: 7 PID: 250 Comm: devlink Not tainted 5.12.0-rc2+ #2836
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:mlx5_attach_device+0x80/0x280 [mlx5_core]
+ Code: f8 48 c1 e8 03 42 80 3c 38 00 0f 85 80 01 00 00 48 8b 45 68 48 8d 78 f0 48 89 fe 48 c1 ee 03 42 80 3c 3e 00 0f 85 70 01 00 00 <48> 8b 40 f0 48 85 c0 74 0d 48 89 ef ff d0 85 c0 0f 85 84 05 0e 00
+ RSP: 0018:ffff8880129675f0 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff827407f1
+ RDX: 1ffff110011336cf RSI: 1ffffffffffffffe RDI: fffffffffffffff0
+ RBP: ffff888008e0c000 R08: 0000000000000008 R09: ffffffffa0662ee7
+ R10: fffffbfff40cc5dc R11: 0000000000000000 R12: ffff88800ea002e0
+ R13: ffffed1001d459f7 R14: ffffffffa05ef4f8 R15: dffffc0000000000
+ FS: 00007f51dfeaf740(0000) GS:ffff88806d5c0000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: fffffffffffffff0 CR3: 000000000bc82006 CR4: 0000000000370ea0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ mlx5_load_one+0x117/0x1d0 [mlx5_core]
+ devlink_reload+0x2d5/0x520
+ ? devlink_remote_reload_actions_performed+0x30/0x30
+ ? mutex_trylock+0x24b/0x2d0
+ ? devlink_nl_cmd_reload+0x62b/0x1070
+ devlink_nl_cmd_reload+0x66d/0x1070
+ ? devlink_reload+0x520/0x520
+ ? devlink_nl_pre_doit+0x64/0x4d0
+ genl_family_rcv_msg_doit+0x1e9/0x2f0
+ ? mutex_lock_io_nested+0x1130/0x1130
+ ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240
+ ? security_capable+0x51/0x90
+ genl_rcv_msg+0x27f/0x4a0
+ ? genl_get_cmd+0x3c0/0x3c0
+ ? lock_acquire+0x1a9/0x6d0
+ ? devlink_reload+0x520/0x520
+ ? lock_release+0x6c0/0x6c0
+ netlink_rcv_skb+0x11d/0x340
+ ? genl_get_cmd+0x3c0/0x3c0
+ ? netlink_ack+0x9f0/0x9f0
+ ? lock_release+0x1f9/0x6c0
+ genl_rcv+0x24/0x40
+ netlink_unicast+0x433/0x700
+ ? netlink_attachskb+0x730/0x730
+ ? _copy_from_iter_full+0x178/0x650
+ ? __alloc_skb+0x113/0x2b0
+ netlink_sendmsg+0x6f1/0xbd0
+ ? netlink_unicast+0x700/0x700
+ ? netlink_unicast+0x700/0x700
+ sock_sendmsg+0xb0/0xe0
+ __sys_sendto+0x193/0x240
+ ? __x64_sys_getpeername+0xb0/0xb0
+ ? copy_page_range+0x2300/0x2300
+ ? __up_read+0x1a1/0x7b0
+ ? do_user_addr_fault+0x219/0xdc0
+ __x64_sys_sendto+0xdd/0x1b0
+ ? syscall_enter_from_user_mode+0x1d/0x50
+ do_syscall_64+0x2d/0x40
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+ RIP: 0033:0x7f51dffb514a
+ Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
+ RSP: 002b:00007ffcaef22e78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f51dffb514a
+ RDX: 0000000000000030 RSI: 000055750daf2440 RDI: 0000000000000003
+ RBP: 000055750daf2410 R08: 00007f51e0081200 R09: 000000000000000c
+ R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ Modules linked in: mlx5_core(-) ptp pps_core ib_ipoib rdma_ucm rdma_cm iw_cm ib_cm ib_umad ib_uverbs ib_core [last unloaded: mlx5_ib]
+ CR2: fffffffffffffff0
+ ---[ end trace 7789831bfe74fa42 ]---
+
+Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+index 9153c9bda96f..f0623e94716b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+@@ -323,6 +323,16 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
+ }
+ } else {
+ adev = &priv->adev[i]->adev;
++
++ /* Pay attention that this is not PCI driver that
++ * mlx5_core_dev is connected, but auxiliary driver.
++ *
++ * Here we can race of module unload with devlink
++ * reload, but we don't need to take extra lock because
++ * we are holding global mlx5_intf_mutex.
++ */
++ if (!adev->dev.driver)
++ continue;
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->resume)
+@@ -353,6 +363,10 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
+ continue;
+
+ adev = &priv->adev[i]->adev;
++ /* Auxiliary driver was unbind manually through sysfs */
++ if (!adev->dev.driver)
++ goto skip_suspend;
++
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->suspend) {
+@@ -360,6 +374,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
+ continue;
+ }
+
++skip_suspend:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
+--
+2.30.2
+
--- /dev/null
+From ede1b45ddc017b77698fc92bd5a6abb4cbdb336a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:20:46 +0300
+Subject: net/mlx5: Consider RoCE cap before init RDMA resources
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit c189716b2a7c1d2d8658e269735273caa1c38b54 ]
+
+Check if RoCE is supported by the device before enable it in
+the vport context and create all the RDMA steering objects.
+
+Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+index 8e0dddc6383f..2389239acadc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+ {
+ int err;
+
++ if (!MLX5_CAP_GEN(dev, roce))
++ return;
++
+ err = mlx5_nic_vport_enable_roce(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+--
+2.30.2
+
--- /dev/null
+From 8cacf28e7420cc0a53c793edff81201f66dffdc6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:23:41 +0300
+Subject: net/mlx5: DR, Don't use SW steering when RoCE is not supported
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 4aaf96ac8b45d8e2e019b6b53cce65a73c4ace2c ]
+
+SW steering uses RC QP to write/read to/from ICM, hence it's not
+supported when RoCE is not supported as well.
+
+Fixes: 70605ea545e8 ("net/mlx5: DR, Expose APIs for direct rule managing")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Reviewed-by: Alex Vesker <valex@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+index 612b0ac31db2..9737565cd8d4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+@@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
+ static inline bool
+ mlx5dr_is_supported(struct mlx5_core_dev *dev)
+ {
+- return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+- (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+- (MLX5_CAP_GEN(dev, steering_format_version) <=
+- MLX5_STEERING_FORMAT_CONNECTX_6DX));
++ return MLX5_CAP_GEN(dev, roce) &&
++ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
++ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
++ (MLX5_CAP_GEN(dev, steering_format_version) <=
++ MLX5_STEERING_FORMAT_CONNECTX_6DX)));
+ }
+
+ /* buddy functions & structure */
+--
+2.30.2
+
--- /dev/null
+From 246ec048ae11602715ca8eb9a3d5ae11cdec493c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Jun 2021 18:10:06 +0300
+Subject: net/mlx5: DR, Fix STEv1 incorrect L3 decapsulation padding
+
+From: Alex Vesker <valex@nvidia.com>
+
+[ Upstream commit 65fb7d109abe3a1a9f1c2d3ba7e1249bc978d5f0 ]
+
+Decapsulation L3 on small inner packets which are less than
+64 Bytes was done incorrectly. In small packets there is an
+extra padding added in L2 which should not be included in L3
+length. The issue was that after decapL3 the extra L2 padding
+caused an update on the L3 length.
+
+To avoid this issue the new header is pushed to the beginning
+of the packet (offset 0) which should not cause a HW reparse
+and update the L3 length.
+
+Fixes: c349b4137cfd ("net/mlx5: DR, Add STEv1 modify header logic")
+Reviewed-by: Erez Shitrit <erezsh@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Alex Vesker <valex@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/steering/dr_ste_v1.c | 26 ++++++++++++-------
+ 1 file changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+index f146c618a78e..46ef45fa9167 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+@@ -712,7 +712,11 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
+ if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
+ return -EINVAL;
+
+- memcpy(padded_data, data, data_sz);
++ inline_data_sz =
++ MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
++
++ /* Add an alignment padding */
++ memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
+
+ /* Remove L2L3 outer headers */
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
+@@ -724,32 +728,34 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+ used_actions++; /* Remove and NOP are a single double action */
+
+- inline_data_sz =
+- MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
++ /* Point to the last dword of the header */
++ data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
+
+- /* Add the new header inline + 2 extra bytes */
++ /* Add the new header using inline action 4Byte at a time, the header
++ * is added in reversed order to the beginning of the packet to avoid
++ * incorrect parsing by the HW. Since header is 14B or 18B an extra
++ * two bytes are padded and later removed.
++ */
+ for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
+ void *addr_inline;
+
+ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_INLINE);
+ /* The hardware expects here offset to words (2 bytes) */
+- MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
+- i * 2);
++ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
+
+ /* Copy bytes one by one to avoid endianness problem */
+ addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
+ hw_action, inline_data);
+- memcpy(addr_inline, data_ptr, inline_data_sz);
++ memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+- data_ptr += inline_data_sz;
+ used_actions++;
+ }
+
+- /* Remove 2 extra bytes */
++ /* Remove first 2 extra bytes */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+- MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
++ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
+ /* The hardware expects here size in words (2 bytes) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
+ used_actions++;
+--
+2.30.2
+
--- /dev/null
+From 763a191a1223f866fba3da4a9ddd47eb5730c48d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:03:24 +0300
+Subject: net/mlx5: E-Switch, Allow setting GUID for host PF vport
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit ca36fc4d77b35b8d142cf1ed0eae5ec2e071dc3c ]
+
+E-switch should be able to set the GUID of host PF vport.
+Currently it returns an error. This results in below error
+when user attempts to configure MAC address of the PF of an
+external controller.
+
+$ devlink port function set pci/0000:03:00.0/196608 \
+ hw_addr 00:00:00:11:22:33
+
+mlx5_core 0000:03:00.0: mlx5_esw_set_vport_mac_locked:1876:(pid 6715):\
+"Failed to set vport 0 node guid, err = -22.
+RDMA_CM will not function properly for this VF."
+
+Check for zero vport is no longer needed.
+
+Fixes: 330077d14de1 ("net/mlx5: E-switch, Supporting setting devlink port function mac address")
+Signed-off-by: Yuval Avnery <yuvalav@nvidia.com>
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Bodong Wang <bodong@nvidia.com>
+Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+index e05c5c0f3ae1..7d21fbb9192f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+@@ -465,8 +465,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+ void *in;
+ int err;
+
+- if (!vport)
+- return -EINVAL;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EACCES;
+
+--
+2.30.2
+
--- /dev/null
+From 0fb9bbae616b9c8137a77cbde1d00a1a68104d7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:14:08 +0300
+Subject: net/mlx5: E-Switch, Read PF mac address
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit bbc8222dc49db8d49add0f27bcac33f4b92193dc ]
+
+External controller PF's MAC address is not read from the device during
+vport setup. Fail to read this results in showing all zeros to user
+while the factory programmed MAC is a valid value.
+
+$ devlink port show eth1 -jp
+{
+ "port": {
+ "pci/0000:03:00.0/196608": {
+ "type": "eth",
+ "netdev": "eth1",
+ "flavour": "pcipf",
+ "controller": 1,
+ "pfnum": 0,
+ "splittable": false,
+ "function": {
+ "hw_addr": "00:00:00:00:00:00"
+ }
+ }
+ }
+}
+
+Hence, read it when enabling a vport.
+
+After the fix,
+
+$ devlink port show eth1 -jp
+{
+ "port": {
+ "pci/0000:03:00.0/196608": {
+ "type": "eth",
+ "netdev": "eth1",
+ "flavour": "pcipf",
+ "controller": 1,
+ "pfnum": 0,
+ "splittable": false,
+ "function": {
+ "hw_addr": "98:03:9b:a0:60:11"
+ }
+ }
+ }
+}
+
+Fixes: f099fde16db3 ("net/mlx5: E-switch, Support querying port function mac address")
+Signed-off-by: Bodong Wang <bodong@nvidia.com>
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 2c6d95900e3c..a3edeea4ddd7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1308,6 +1308,12 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+ goto err_vhca_mapping;
+ }
+
++ /* External controller host PF has factory programmed MAC.
++ * Read it from the device.
++ */
++ if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
++ mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
++
+ esw_vport_change_handle_locked(vport);
+
+ esw->enabled_vports++;
+--
+2.30.2
+
--- /dev/null
+From 0ef3c0c99483c5e35b7f8cafa71b92f4cf903f61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Mar 2021 15:41:55 +0200
+Subject: net/mlx5: Fix error path for set HCA defaults
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 94a4b8414d3e91104873007b659252f855ee344a ]
+
+In the case of the failure to execute mlx5_core_set_hca_defaults(),
+we used wrong goto label to execute error unwind flow.
+
+Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized")
+Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index efb93d63e54c..58b8f75d7a01 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1157,7 +1157,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
+ err = mlx5_core_set_hca_defaults(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to set hca defaults\n");
+- goto err_sriov;
++ goto err_set_hca;
+ }
+
+ mlx5_vhca_event_start(dev);
+@@ -1190,6 +1190,7 @@ err_ec:
+ mlx5_sf_hw_table_destroy(dev);
+ err_vhca:
+ mlx5_vhca_event_stop(dev);
++err_set_hca:
+ mlx5_cleanup_fs(dev);
+ err_fs:
+ mlx5_accel_tls_cleanup(dev);
+--
+2.30.2
+
--- /dev/null
+From 03f5565e2579dae159d0c34ca485998323acaed2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 14:20:28 +0300
+Subject: net/mlx5: Reset mkey index on creation
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 0232fc2ddcf4ffe01069fd1aa07922652120f44a ]
+
+Reset only the index part of the mkey and keep the variant part. On
+devlink reload, driver recreates mkeys, so the mkey index may change.
+Trying to preserve the variant part of the mkey, driver mistakenly
+merged the mkey index with current value. In case of a devlink reload,
+current value of index part is dirty, so the index may be corrupted.
+
+Fixes: 54c62e13ad76 ("{IB,net}/mlx5: Setup mkey variant before mr create command invocation")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Amir Tzin <amirtz@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+index 50af84e76fb6..174f71ed5280 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+@@ -54,7 +54,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+ mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+ mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+ mkey->size = MLX5_GET64(mkc, mkc, len);
+- mkey->key |= mlx5_idx_to_mkey(mkey_index);
++ mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
+ mkey->pd = MLX5_GET(mkc, mkc, pd);
+ init_waitqueue_head(&mkey->wait);
+
+--
+2.30.2
+
--- /dev/null
+From 3a2693b1bd2bd969d87f65e7392eeb96ca809f94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 18:39:53 +0300
+Subject: net/mlx5: SF_DEV, remove SF device on invalid state
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit c7d6c19b3bde66d7aebbe93e0f9e6d9ff57fc3fa ]
+
+When auxiliary bus autoprobe is disabled and SF is in ACTIVE state,
+on SF port deletion it transitions from ACTIVE->ALLOCATED->INVALID.
+
+When VHCA event handler queries the state, it is already transition
+to INVALID state.
+
+In this scenario, event handler missed to delete the SF device.
+
+Fix it by deleting the SF when SF state is INVALID.
+
+Fixes: 90d010b8634b ("net/mlx5: SF, Add auxiliary device support")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Vu Pham <vuhuong@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+index 90b524c59f3c..c4139f4648bf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+@@ -153,6 +153,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
+ sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id);
+ sf_dev = xa_load(&table->devices, sf_index);
+ switch (event->new_vhca_state) {
++ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ if (sf_dev)
+ mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+--
+2.30.2
+
--- /dev/null
+From 4d863dc2a18bad335b9b880410dbbe3a7e018bfe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 May 2021 10:40:36 +0300
+Subject: net/mlx5e: Block offload of outer header csum for GRE tunnel
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 54e1217b90486c94b26f24dcee1ee5ef5372f832 ]
+
+The device is able to offload either the outer header csum or inner
+header csum. The driver utilizes the inner csum offload. So, prohibit
+setting of tx-gre-csum-segmentation and let it be: off[fixed].
+
+Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 2a3da167f248..16b8f5245032 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5174,12 +5174,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+- netdev->hw_features |= NETIF_F_GSO_GRE |
+- NETIF_F_GSO_GRE_CSUM;
+- netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+- NETIF_F_GSO_GRE_CSUM;
+- netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+- NETIF_F_GSO_GRE_CSUM;
++ netdev->hw_features |= NETIF_F_GSO_GRE;
++ netdev->hw_enc_features |= NETIF_F_GSO_GRE;
++ netdev->gso_partial_features |= NETIF_F_GSO_GRE;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
+--
+2.30.2
+
--- /dev/null
+From 1e7409821a98d50ca5a050231605875101173296 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 May 2021 14:34:58 +0300
+Subject: net/mlx5e: Block offload of outer header csum for UDP tunnels
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 6d6727dddc7f93fcc155cb8d0c49c29ae0e71122 ]
+
+The device is able to offload either the outer header csum or inner
+header csum. The driver utilizes the inner csum offload. Hence, block
+setting of tx-udp_tnl-csum-segmentation and set it to off[fixed].
+
+Fixes: b49663c8fb49 ("net/mlx5e: Add support for UDP tunnel segmentation with outer checksum offload")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 99dc9f2beed5..2a3da167f248 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5168,13 +5168,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
+ }
+
+ if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
+- netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+- NETIF_F_GSO_UDP_TUNNEL_CSUM;
+- netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+- NETIF_F_GSO_UDP_TUNNEL_CSUM;
+- netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+- netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
+- NETIF_F_GSO_UDP_TUNNEL_CSUM;
++ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
++ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
++ netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+--
+2.30.2
+
--- /dev/null
+From d2bb0740fbb3cdaafa24d95069cdb6a9f3ff246d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 May 2021 11:14:19 +0300
+Subject: net/mlx5e: Don't create devices during unload flow
+
+From: Dmytro Linkin <dlinkin@nvidia.com>
+
+[ Upstream commit a5ae8fc9058e37437c8c1f82b3d412b4abd1b9e6 ]
+
+Running devlink reload command for port in switchdev mode cause
+resources to corrupt: driver can't release allocated EQ and reclaim
+memory pages, because "rdma" auxiliary device had add CQs which blocks
+EQ from deletion.
+Erroneous sequence happens during reload-down phase, and is following:
+
+1. detach device - suspends auxiliary devices which support it, destroys
+ others. During this step "eth-rep" and "rdma-rep" are destroyed,
+ "eth" - suspended.
+2. disable SRIOV - moves device to legacy mode; as part of disablement -
+ rescans drivers. This step adds "rdma" auxiliary device.
+3. destroy EQ table - <failure>.
+
+Driver shouldn't create any device during unload flows. To handle that
+implement MLX5_PRIV_FLAGS_DETACH flag, set it on device detach and unset
+on device attach. If flag is set do no-op on drivers rescan.
+
+Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
+Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 4 ++++
+ include/linux/mlx5/driver.h | 4 ++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+index f0623e94716b..897853a68cd0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+@@ -306,6 +306,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
+ int ret = 0, i;
+
+ mutex_lock(&mlx5_intf_mutex);
++ priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+ for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+ if (!priv->adev[i]) {
+ bool is_supported = false;
+@@ -378,6 +379,7 @@ skip_suspend:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
++ priv->flags |= MLX5_PRIV_FLAGS_DETACH;
+ mutex_unlock(&mlx5_intf_mutex);
+ }
+
+@@ -466,6 +468,8 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
+ struct mlx5_priv *priv = &dev->priv;
+
+ lockdep_assert_held(&mlx5_intf_mutex);
++ if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
++ return 0;
+
+ delete_drivers(dev);
+ if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index 133967c40214..6a31bbba1b6f 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -541,6 +541,10 @@ struct mlx5_core_roce {
+ enum {
+ MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
++ /* Set during device detach to block any further devices
++ * creation/deletion on drivers rescan. Unset during device attach.
++ */
++ MLX5_PRIV_FLAGS_DETACH = 1 << 2,
+ };
+
+ struct mlx5_adev {
+--
+2.30.2
+
--- /dev/null
+From 3b06ca6032085a5604094032907be282297b1d96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 May 2021 13:45:10 +0300
+Subject: net/mlx5e: Fix page reclaim for dead peer hairpin
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+[ Upstream commit a3e5fd9314dfc4314a9567cde96e1aef83a7458a ]
+
+When adding a hairpin flow, a firmware-side send queue is created for
+the peer net device, which claims some host memory pages for its
+internal ring buffer. If the peer net device is removed/unbound before
+the hairpin flow is deleted, then the send queue is not destroyed which
+leads to a stack trace on pci device remove:
+
+[ 748.005230] mlx5_core 0000:08:00.2: wait_func:1094:(pid 12985): MANAGE_PAGES(0x108) timeout. Will cause a leak of a command resource
+[ 748.005231] mlx5_core 0000:08:00.2: reclaim_pages:514:(pid 12985): failed reclaiming pages: err -110
+[ 748.001835] mlx5_core 0000:08:00.2: mlx5_reclaim_root_pages:653:(pid 12985): failed reclaiming pages (-110) for func id 0x0
+[ 748.002171] ------------[ cut here ]------------
+[ 748.001177] FW pages counter is 4 after reclaiming all pages
+[ 748.001186] WARNING: CPU: 1 PID: 12985 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:685 mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core] [ +0.002771] Modules linked in: cls_flower mlx5_ib mlx5_core ptp pps_core act_mirred sch_ingress openvswitch nsh xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_umad ib_ipoib iw_cm ib_cm ib_uverbs ib_core overlay fuse [last unloaded: pps_core]
+[ 748.007225] CPU: 1 PID: 12985 Comm: tee Not tainted 5.12.0+ #1
+[ 748.001376] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[ 748.002315] RIP: 0010:mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]
+[ 748.001679] Code: 28 00 00 00 0f 85 22 01 00 00 48 81 c4 b0 00 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c7 40 cc 19 a1 e8 9f 71 0e e2 <0f> 0b e9 30 ff ff ff 48 c7 c7 a0 cc 19 a1 e8 8c 71 0e e2 0f 0b e9
+[ 748.003781] RSP: 0018:ffff88815220faf8 EFLAGS: 00010286
+[ 748.001149] RAX: 0000000000000000 RBX: ffff8881b4900280 RCX: 0000000000000000
+[ 748.001445] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102a441f51
+[ 748.001614] RBP: 00000000000032b9 R08: 0000000000000001 R09: ffffed1054a15ee8
+[ 748.001446] R10: ffff8882a50af73b R11: ffffed1054a15ee7 R12: fffffbfff07c1e30
+[ 748.001447] R13: dffffc0000000000 R14: ffff8881b492cba8 R15: 0000000000000000
+[ 748.001429] FS: 00007f58bd08b580(0000) GS:ffff8882a5080000(0000) knlGS:0000000000000000
+[ 748.001695] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 748.001309] CR2: 000055a026351740 CR3: 00000001d3b48006 CR4: 0000000000370ea0
+[ 748.001506] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 748.001483] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 748.001654] Call Trace:
+[ 748.000576] ? mlx5_satisfy_startup_pages+0x290/0x290 [mlx5_core]
+[ 748.001416] ? mlx5_cmd_teardown_hca+0xa2/0xd0 [mlx5_core]
+[ 748.001354] ? mlx5_cmd_init_hca+0x280/0x280 [mlx5_core]
+[ 748.001203] mlx5_function_teardown+0x30/0x60 [mlx5_core]
+[ 748.001275] mlx5_uninit_one+0xa7/0xc0 [mlx5_core]
+[ 748.001200] remove_one+0x5f/0xc0 [mlx5_core]
+[ 748.001075] pci_device_remove+0x9f/0x1d0
+[ 748.000833] device_release_driver_internal+0x1e0/0x490
+[ 748.001207] unbind_store+0x19f/0x200
+[ 748.000942] ? sysfs_file_ops+0x170/0x170
+[ 748.001000] kernfs_fop_write_iter+0x2bc/0x450
+[ 748.000970] new_sync_write+0x373/0x610
+[ 748.001124] ? new_sync_read+0x600/0x600
+[ 748.001057] ? lock_acquire+0x4d6/0x700
+[ 748.000908] ? lockdep_hardirqs_on_prepare+0x400/0x400
+[ 748.001126] ? fd_install+0x1c9/0x4d0
+[ 748.000951] vfs_write+0x4d0/0x800
+[ 748.000804] ksys_write+0xf9/0x1d0
+[ 748.000868] ? __x64_sys_read+0xb0/0xb0
+[ 748.000811] ? filp_open+0x50/0x50
+[ 748.000919] ? syscall_enter_from_user_mode+0x1d/0x50
+[ 748.001223] do_syscall_64+0x3f/0x80
+[ 748.000892] entry_SYSCALL_64_after_hwframe+0x44/0xae
+[ 748.001026] RIP: 0033:0x7f58bcfb22f7
+[ 748.000944] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
+[ 748.003925] RSP: 002b:00007fffd7f2aaa8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 748.001732] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f58bcfb22f7
+[ 748.001426] RDX: 000000000000000d RSI: 00007fffd7f2abc0 RDI: 0000000000000003
+[ 748.001746] RBP: 00007fffd7f2abc0 R08: 0000000000000000 R09: 0000000000000001
+[ 748.001631] R10: 00000000000001b6 R11: 0000000000000246 R12: 000000000000000d
+[ 748.001537] R13: 00005597ac2c24a0 R14: 000000000000000d R15: 00007f58bd084700
+[ 748.001564] irq event stamp: 0
+[ 748.000787] hardirqs last enabled at (0): [<0000000000000000>] 0x0
+[ 748.001399] hardirqs last disabled at (0): [<ffffffff813132cf>] copy_process+0x146f/0x5eb0
+[ 748.001854] softirqs last enabled at (0): [<ffffffff8131330e>] copy_process+0x14ae/0x5eb0
+[ 748.013431] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[ 748.001492] ---[ end trace a6fabd773d1c51ae ]---
+
+Fix by destroying the send queue of a hairpin peer net device that is
+being removed/unbound, which returns the allocated ring buffer pages to
+the host.
+
+Fixes: 4d8fcf216c90 ("net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin rules")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
+ .../ethernet/mellanox/mlx5/core/transobj.c | 30 +++++++++++++++----
+ include/linux/mlx5/transobj.h | 1 +
+ 3 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index b633f669ea57..b3b8e44540a5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -4622,7 +4622,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+ list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
+ wait_for_completion(&hpe->res_ready);
+ if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
+- hpe->hp->pair->peer_gone = true;
++ mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
+
+ mlx5e_hairpin_put(priv, hpe);
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+index 01cc00ad8acf..b6931bbe52d2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+@@ -424,6 +424,15 @@ err_modify_sq:
+ return err;
+ }
+
++static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
++{
++ int i;
++
++ for (i = 0; i < hp->num_channels; i++)
++ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
++ MLX5_SQC_STATE_RST, 0, 0);
++}
++
+ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+ {
+ int i;
+@@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+-
+ /* unset peer SQs */
+- if (hp->peer_gone)
+- return;
+- for (i = 0; i < hp->num_channels; i++)
+- mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+- MLX5_SQC_STATE_RST, 0, 0);
++ if (!hp->peer_gone)
++ mlx5_hairpin_unpair_peer_sq(hp);
+ }
+
+ struct mlx5_hairpin *
+@@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
+ mlx5_hairpin_destroy_queues(hp);
+ kfree(hp);
+ }
++
++void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
++{
++ int i;
++
++ mlx5_hairpin_unpair_peer_sq(hp);
++
++ /* destroy peer SQ */
++ for (i = 0; i < hp->num_channels; i++)
++ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
++
++ hp->peer_gone = true;
++}
+diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
+index 028f442530cf..60ffeb6b67ae 100644
+--- a/include/linux/mlx5/transobj.h
++++ b/include/linux/mlx5/transobj.h
+@@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
+ struct mlx5_hairpin_params *params);
+
+ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
++void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
+ #endif /* __TRANSOBJ_H__ */
+--
+2.30.2
+
--- /dev/null
+From cd14585a8542e0831130a18e5f6b57568b543362 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 May 2021 16:28:39 +0300
+Subject: net/mlx5e: Fix use-after-free of encap entry in neigh update handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit fb1a3132ee1ac968316e45d21a48703a6db0b6c3 ]
+
+Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock
+removal from TC filter update path and properly handle concurrent encap
+entry insertion/deletion which can lead to following use-after-free:
+
+ [23827.464923] ==================================================================
+ [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635
+ [23827.472251]
+ [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5
+ [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
+ [23827.476731] Call Trace:
+ [23827.477260] dump_stack+0xbb/0x107
+ [23827.477906] print_address_description.constprop.0+0x18/0x140
+ [23827.478896] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.479879] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.480905] kasan_report.cold+0x7c/0xd8
+ [23827.481701] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.482744] kasan_check_range+0x145/0x1a0
+ [23827.493112] mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.494054] ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core]
+ [23827.495296] mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core]
+ [23827.496338] ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core]
+ [23827.497486] ? read_word_at_a_time+0xe/0x20
+ [23827.498250] ? strscpy+0xa0/0x2a0
+ [23827.498889] process_one_work+0x8ac/0x14e0
+ [23827.499638] ? lockdep_hardirqs_on_prepare+0x400/0x400
+ [23827.500537] ? pwq_dec_nr_in_flight+0x2c0/0x2c0
+ [23827.501359] ? rwlock_bug.part.0+0x90/0x90
+ [23827.502116] worker_thread+0x53b/0x1220
+ [23827.502831] ? process_one_work+0x14e0/0x14e0
+ [23827.503627] kthread+0x328/0x3f0
+ [23827.504254] ? _raw_spin_unlock_irq+0x24/0x40
+ [23827.505065] ? __kthread_bind_mask+0x90/0x90
+ [23827.505912] ret_from_fork+0x1f/0x30
+ [23827.506621]
+ [23827.506987] Allocated by task 28248:
+ [23827.507694] kasan_save_stack+0x1b/0x40
+ [23827.508476] __kasan_kmalloc+0x7c/0x90
+ [23827.509197] mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core]
+ [23827.510194] mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core]
+ [23827.511218] __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core]
+ [23827.512234] mlx5e_configure_flower+0x191c/0x4870 [mlx5_core]
+ [23827.513298] tc_setup_cb_add+0x1d5/0x420
+ [23827.514023] fl_hw_replace_filter+0x382/0x6a0 [cls_flower]
+ [23827.514975] fl_change+0x2ceb/0x4a51 [cls_flower]
+ [23827.515821] tc_new_tfilter+0x89a/0x2070
+ [23827.516548] rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.517300] netlink_rcv_skb+0x11d/0x340
+ [23827.518021] netlink_unicast+0x42b/0x700
+ [23827.518742] netlink_sendmsg+0x743/0xc20
+ [23827.519467] sock_sendmsg+0xb2/0xe0
+ [23827.520131] ____sys_sendmsg+0x590/0x770
+ [23827.520851] ___sys_sendmsg+0xd8/0x160
+ [23827.521552] __sys_sendmsg+0xb7/0x140
+ [23827.522238] do_syscall_64+0x3a/0x70
+ [23827.522907] entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.523797]
+ [23827.524163] Freed by task 25948:
+ [23827.524780] kasan_save_stack+0x1b/0x40
+ [23827.525488] kasan_set_track+0x1c/0x30
+ [23827.526187] kasan_set_free_info+0x20/0x30
+ [23827.526968] __kasan_slab_free+0xed/0x130
+ [23827.527709] slab_free_freelist_hook+0xcf/0x1d0
+ [23827.528528] kmem_cache_free_bulk+0x33a/0x6e0
+ [23827.529317] kfree_rcu_work+0x55f/0xb70
+ [23827.530024] process_one_work+0x8ac/0x14e0
+ [23827.530770] worker_thread+0x53b/0x1220
+ [23827.531480] kthread+0x328/0x3f0
+ [23827.532114] ret_from_fork+0x1f/0x30
+ [23827.532785]
+ [23827.533147] Last potentially related work creation:
+ [23827.534007] kasan_save_stack+0x1b/0x40
+ [23827.534710] kasan_record_aux_stack+0xab/0xc0
+ [23827.535492] kvfree_call_rcu+0x31/0x7b0
+ [23827.536206] mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core]
+ [23827.537305] mlx5e_flow_put+0x49/0x80 [mlx5_core]
+ [23827.538290] mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core]
+ [23827.539300] tc_setup_cb_destroy+0x18e/0x2f0
+ [23827.540144] fl_hw_destroy_filter+0x1d2/0x310 [cls_flower]
+ [23827.541148] __fl_delete+0x4dc/0x660 [cls_flower]
+ [23827.541985] fl_delete+0x97/0x160 [cls_flower]
+ [23827.542782] tc_del_tfilter+0x7ab/0x13d0
+ [23827.543503] rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.544257] netlink_rcv_skb+0x11d/0x340
+ [23827.544981] netlink_unicast+0x42b/0x700
+ [23827.545700] netlink_sendmsg+0x743/0xc20
+ [23827.546424] sock_sendmsg+0xb2/0xe0
+ [23827.547084] ____sys_sendmsg+0x590/0x770
+ [23827.547850] ___sys_sendmsg+0xd8/0x160
+ [23827.548606] __sys_sendmsg+0xb7/0x140
+ [23827.549303] do_syscall_64+0x3a/0x70
+ [23827.549969] entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.550853]
+ [23827.551217] The buggy address belongs to the object at ffff8881d1322200
+ [23827.551217] which belongs to the cache kmalloc-256 of size 256
+ [23827.553341] The buggy address is located 140 bytes inside of
+ [23827.553341] 256-byte region [ffff8881d1322200, ffff8881d1322300)
+ [23827.555747] The buggy address belongs to the page:
+ [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320
+ [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0
+ [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
+ [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40
+ [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
+ [23827.564112] page dumped because: kasan: bad access detected
+ [23827.565439]
+ [23827.565932] Memory state around the buggy address:
+ [23827.566917] ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.568485] ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.571143] ^
+ [23827.571879] ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.573283] ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.574654] ==================================================================
+
+Most of the necessary logic is already correctly implemented by
+mlx5e_get_next_valid_encap() helper that is used in neigh stats update
+handler. Make the handler generic by renaming it to
+mlx5e_get_next_matching_encap() and use callback to test whether flow is
+matching instead of hardcoded check for 'valid' flag value. Implement
+mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap()
+with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new
+mlx5e_get_next_init_encap() helper by calling
+mlx5e_get_next_matching_encap() with callback that tests encap completion
+result to be non-error and use it in mlx5e_rep_neigh_update() to safely
+iterate over nhe->encap_list.
+
+Remove encap completion logic from mlx5e_rep_update_flows() since the encap
+entries passed to this function are already guaranteed to be properly
+initialized by similar code in mlx5e_get_next_init_encap().
+
+Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en/rep/neigh.c | 15 ++++-----
+ .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 6 +---
+ .../mellanox/mlx5/core/en/tc_tun_encap.c | 33 +++++++++++++++++--
+ .../net/ethernet/mellanox/mlx5/core/en_tc.h | 3 ++
+ 4 files changed, 40 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+index be0ee03de721..2e9bee4e5209 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+@@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
++ struct mlx5e_encap_entry *e = NULL;
+ bool neigh_connected, same_dev;
+- struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+- struct mlx5e_priv *priv;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+@@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
+ if (!same_dev)
+ goto out;
+
+- list_for_each_entry(e, &nhe->encap_list, encap_list) {
+- if (!mlx5e_encap_take(e))
+- continue;
++ /* mlx5e_get_next_init_encap() releases previous encap before returning
++ * the next one.
++ */
++ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
++ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
+
+- priv = netdev_priv(e->out_dev);
+- mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+- mlx5e_encap_put(priv, e);
+- }
+ out:
+ rtnl_unlock();
+ mlx5e_release_neigh_update_work(update_work);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+index 96ba027dbef3..9992f94f794b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+@@ -93,13 +93,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+
+ ASSERT_RTNL();
+
+- /* wait for encap to be fully initialized */
+- wait_for_completion(&e->res_ready);
+-
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+- if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+- ether_addr_equal(e->h_dest, ha)))
++ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index 1560fcbf4ac7..a17d79effa27 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -250,9 +250,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+ }
+
++typedef bool (match_cb)(struct mlx5e_encap_entry *);
++
+ static struct mlx5e_encap_entry *
+-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+- struct mlx5e_encap_entry *e)
++mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e,
++ match_cb match)
+ {
+ struct mlx5e_encap_entry *next = NULL;
+
+@@ -287,7 +290,7 @@ retry:
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
++ if (!match(next)) {
+ e = next;
+ goto retry;
+ }
+@@ -295,6 +298,30 @@ retry:
+ return next;
+ }
+
++static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
++{
++ return e->flags & MLX5_ENCAP_ENTRY_VALID;
++}
++
++static struct mlx5e_encap_entry *
++mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e)
++{
++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
++}
++
++static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
++{
++ return e->compl_result >= 0;
++}
++
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e)
++{
++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
++}
++
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+ {
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+index 25c091795bcd..17027536efba 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+@@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
+ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+
+ struct mlx5e_neigh_hash_entry;
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e);
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+
+ void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+--
+2.30.2
+
--- /dev/null
+From 20e0c5a5df38bbfb58c664a92748864aa7513662 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 13:20:32 -0500
+Subject: net/mlx5e: Remove dependency in IPsec initialization flows
+
+From: Huy Nguyen <huyn@nvidia.com>
+
+[ Upstream commit 8ad893e516a77209a1818a2072d2027d87db809f ]
+
+Currently, IPsec feature is disabled because mlx5e_build_nic_netdev
+is required to be called after mlx5e_ipsec_init. This requirement is
+invalid as mlx5e_build_nic_netdev and mlx5e_ipsec_init initialize
+independent resources.
+
+Remove ipsec pointer check in mlx5e_build_nic_netdev so that the
+two functions can be called at any order.
+
+Fixes: 547eede070eb ("net/mlx5e: IPSec, Innova IPSec offload infrastructure")
+Signed-off-by: Huy Nguyen <huyn@nvidia.com>
+Reviewed-by: Raed Salem <raeds@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 3d45341e2216..26f7fab109d9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *netdev = priv->netdev;
+
+- if (!priv->ipsec)
+- return;
+-
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
+ !MLX5_CAP_ETH(mdev, swp)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+--
+2.30.2
+
--- /dev/null
+From f5ecb9b951cf04278170ec6420272483a18e6cd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:06:50 +0300
+Subject: net: qrtr: fix OOB Read in qrtr_endpoint_post
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit ad9d24c9429e2159d1e279dc3a83191ccb4daf1d ]
+
+Syzbot reported slab-out-of-bounds Read in
+qrtr_endpoint_post. The problem was in wrong
+_size_ type:
+
+ if (len != ALIGN(size, 4) + hdrlen)
+ goto err;
+
+If size from qrtr_hdr is 4294967293 (0xfffffffd), the result of
+ALIGN(size, 4) will be 0. In case of len == hdrlen and size == 4294967293
+in header this check won't fail and
+
+ skb_put_data(skb, data + hdrlen, size);
+
+will read out of bound from data, which is hdrlen allocated block.
+
+Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets")
+Reported-and-tested-by: syzbot+1917d778024161609247@syzkaller.appspotmail.com
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/qrtr/qrtr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
+index 1e4fb568fa84..24f10bf7d8a3 100644
+--- a/net/qrtr/qrtr.c
++++ b/net/qrtr/qrtr.c
+@@ -435,7 +435,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
+ struct qrtr_sock *ipc;
+ struct sk_buff *skb;
+ struct qrtr_cb *cb;
+- unsigned int size;
++ size_t size;
+ unsigned int ver;
+ size_t hdrlen;
+
+--
+2.30.2
+
--- /dev/null
+From 610ab271b1ee77934768e267cdac94c7da62eff6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 13:26:00 -0500
+Subject: net: qualcomm: rmnet: don't over-count statistics
+
+From: Alex Elder <elder@linaro.org>
+
+[ Upstream commit 994c393bb6886d6d94d628475b274a8cb3fc67a4 ]
+
+The purpose of the loop using u64_stats_fetch_*_irq() is to ensure
+statistics on a given CPU are collected atomically. If one of the
+statistics values gets updated within the begin/retry window, the
+loop will run again.
+
+Currently the statistics totals are updated inside that window.
+This means that if the loop ever retries, the statistics for the
+CPU will be counted more than once.
+
+Fix this by taking a snapshot of a CPU's statistics inside the
+protected window, and then updating the counters with the snapshot
+values after exiting the loop.
+
+(Also add a newline at the end of this file...)
+
+Fixes: 192c4b5d48f2a ("net: qualcomm: rmnet: Add support for 64 bit stats")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+index 41fbd2ceeede..ab1e0fcccabb 100644
+--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+@@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *s)
+ {
+ struct rmnet_priv *priv = netdev_priv(dev);
+- struct rmnet_vnd_stats total_stats;
++ struct rmnet_vnd_stats total_stats = { };
+ struct rmnet_pcpu_stats *pcpu_ptr;
++ struct rmnet_vnd_stats snapshot;
+ unsigned int cpu, start;
+
+- memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
+-
+ for_each_possible_cpu(cpu) {
+ pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
+- total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
+- total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
+- total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
+- total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
++ snapshot = pcpu_ptr->stats; /* struct assignment */
+ } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
+
+- total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
++ total_stats.rx_pkts += snapshot.rx_pkts;
++ total_stats.rx_bytes += snapshot.rx_bytes;
++ total_stats.tx_pkts += snapshot.tx_pkts;
++ total_stats.tx_bytes += snapshot.tx_bytes;
++ total_stats.tx_drops += snapshot.tx_drops;
+ }
+
+ s->rx_packets = total_stats.rx_pkts;
+@@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port,
+ }
+
+ return 0;
+-}
+\ No newline at end of file
++}
+--
+2.30.2
+
--- /dev/null
+From a5183df796f299f6954fa9fce9ae6807eb93cfe0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 11:06:41 +0300
+Subject: net: rds: fix memory leak in rds_recvmsg
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 49bfcbfd989a8f1f23e705759a6bb099de2cff9f ]
+
+Syzbot reported memory leak in rds. The problem
+was in unputted refcount in case of error.
+
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int msg_flags)
+{
+...
+
+ if (!rds_next_incoming(rs, &inc)) {
+ ...
+ }
+
+After this "if" inc refcount incremented and
+
+ if (rds_cmsg_recv(inc, msg, rs)) {
+ ret = -EFAULT;
+ goto out;
+ }
+...
+out:
+ return ret;
+}
+
+in case of rds_cmsg_recv() fail the refcount won't be
+decremented. And it's easy to see from ftrace log, that
+rds_inc_addref() don't have rds_inc_put() pair in
+rds_recvmsg() after rds_cmsg_recv()
+
+ 1) | rds_recvmsg() {
+ 1) 3.721 us | rds_inc_addref();
+ 1) 3.853 us | rds_message_inc_copy_to_user();
+ 1) + 10.395 us | rds_cmsg_recv();
+ 1) + 34.260 us | }
+
+Fixes: bdbe6fbc6a2f ("RDS: recv.c")
+Reported-and-tested-by: syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
+Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rds/recv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/rds/recv.c b/net/rds/recv.c
+index aba4afe4dfed..967d115f97ef 100644
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+
+ if (rds_cmsg_recv(inc, msg, rs)) {
+ ret = -EFAULT;
+- goto out;
++ break;
+ }
+ rds_recvmsg_zcookie(rs, msg);
+
+--
+2.30.2
+
--- /dev/null
+From 4a6665fa5c65999ad0fe0899a081aa966fc7007d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 11:23:56 -0300
+Subject: net/sched: act_ct: handle DNAT tuple collision
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 13c62f5371e3eb4fc3400cfa26e64ca75f888008 ]
+
+This this the counterpart of 8aa7b526dc0b ("openvswitch: handle DNAT
+tuple collision") for act_ct. From that commit changelog:
+
+"""
+With multiple DNAT rules it's possible that after destination
+translation the resulting tuples collide.
+
+...
+
+Netfilter handles this case by allocating a null binding for SNAT at
+egress by default. Perform the same operation in openvswitch for DNAT
+if no explicit SNAT is requested by the user and allocate a null binding
+for SNAT for packets in the "original" direction.
+"""
+
+Fixes: 95219afbb980 ("act_ct: support asymmetric conntrack")
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index ba7f57cb41c3..143786d8cde0 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
+ }
+
+ err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+- if (err == NF_ACCEPT &&
+- ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
+- if (maniptype == NF_NAT_MANIP_SRC)
+- maniptype = NF_NAT_MANIP_DST;
+- else
+- maniptype = NF_NAT_MANIP_SRC;
+-
+- err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
++ if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
++ if (ct->status & IPS_SRC_NAT) {
++ if (maniptype == NF_NAT_MANIP_SRC)
++ maniptype = NF_NAT_MANIP_DST;
++ else
++ maniptype = NF_NAT_MANIP_SRC;
++
++ err = ct_nat_execute(skb, ct, ctinfo, range,
++ maniptype);
++ } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
++ err = ct_nat_execute(skb, ct, ctinfo, NULL,
++ NF_NAT_MANIP_SRC);
++ }
+ }
+ return err;
+ #else
+--
+2.30.2
+
--- /dev/null
+From f38d9b13d27aaf69b252401242437b383933f63b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 15:16:11 +0800
+Subject: net: stmmac: dwmac1000: Fix extended MAC address registers definition
+
+From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+
+[ Upstream commit 1adb20f0d496b2c61e9aa1f4761b8d71f93d258e ]
+
+The register starts from 0x800 is the 16th MAC address register rather
+than the first one.
+
+Fixes: cffb13f4d6fb ("stmmac: extend mac addr reg and fix perfect filering")
+Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+index b70d44ac0990..3c73453725f9 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+@@ -76,10 +76,10 @@ enum power_event {
+ #define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */
+
+ /* GMAC HW ADDR regs */
+-#define GMAC_ADDR_HIGH(reg) (((reg > 15) ? 0x00000800 : 0x00000040) + \
+- (reg * 8))
+-#define GMAC_ADDR_LOW(reg) (((reg > 15) ? 0x00000804 : 0x00000044) + \
+- (reg * 8))
++#define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
++ 0x00000040 + (reg * 8))
++#define GMAC_ADDR_LOW(reg) ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \
++ 0x00000044 + (reg * 8))
+ #define GMAC_MAX_PERFECT_ADDRESSES 1
+
+ #define GMAC_PCS_BASE 0x000000c0 /* PCS register base */
+--
+2.30.2
+
--- /dev/null
+From 49654c7e2d29da5f367d58655dd3f9e5bde99d07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 10:48:33 +0800
+Subject: net: usb: fix possible use-after-free in smsc75xx_bind
+
+From: Dongliang Mu <mudongliangabcd@gmail.com>
+
+[ Upstream commit 56b786d86694e079d8aad9b314e015cd4ac02a3d ]
+
+The commit 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
+fails to clean up the work scheduled in smsc75xx_reset->
+smsc75xx_set_multicast, which leads to use-after-free if the work is
+scheduled to start after the deallocation. In addition, this patch
+also removes a dangling pointer - dev->data[0].
+
+This patch calls cancel_work_sync to cancel the scheduled work and set
+the dangling pointer to NULL.
+
+Fixes: 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
+Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc75xx.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
+index 76ed79bb1e3f..5281291711af 100644
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ ret = smsc75xx_wait_ready(dev, 0);
+ if (ret < 0) {
+ netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
+- goto err;
++ goto free_pdata;
+ }
+
+ smsc75xx_init_mac_address(dev);
+@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ ret = smsc75xx_reset(dev);
+ if (ret < 0) {
+ netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
+- goto err;
++ goto cancel_work;
+ }
+
+ dev->net->netdev_ops = &smsc75xx_netdev_ops;
+@@ -1503,8 +1503,11 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
+ return 0;
+
+-err:
++cancel_work:
++ cancel_work_sync(&pdata->set_multicast);
++free_pdata:
+ kfree(pdata);
++ dev->data[0] = 0;
+ return ret;
+ }
+
+@@ -1515,7 +1518,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
+ cancel_work_sync(&pdata->set_multicast);
+ netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+ kfree(pdata);
+- pdata = NULL;
+ dev->data[0] = 0;
+ }
+ }
+--
+2.30.2
+
--- /dev/null
+From 6ae3cf75093f67de3f00ae738572c778ae814bd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Jun 2021 03:07:28 +0200
+Subject: netfilter: nf_tables: initialize set before expression setup
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit ad9f151e560b016b6ad3280b48e42fa11e1a5440 ]
+
+nft_set_elem_expr_alloc() needs an initialized set if expression sets on
+the NFT_EXPR_GC flag. Move set fields initialization before expression
+setup.
+
+[4512935.019450] ==================================================================
+[4512935.019456] BUG: KASAN: null-ptr-deref in nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019487] Read of size 8 at addr 0000000000000070 by task nft/23532
+[4512935.019494] CPU: 1 PID: 23532 Comm: nft Not tainted 5.12.0-rc4+ #48
+[...]
+[4512935.019502] Call Trace:
+[4512935.019505] dump_stack+0x89/0xb4
+[4512935.019512] ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019536] ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019560] kasan_report.cold.12+0x5f/0xd8
+[4512935.019566] ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019590] nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019615] nf_tables_newset+0xc7f/0x1460 [nf_tables]
+
+Reported-by: syzbot+ce96ca2b1d0b37c6422d@syzkaller.appspotmail.com
+Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 83 ++++++++++++++++++-----------------
+ 1 file changed, 42 insertions(+), 41 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 31016c144c48..9d5ea2352965 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4317,13 +4317,44 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
+ err = nf_tables_set_alloc_name(&ctx, set, name);
+ kfree(name);
+ if (err < 0)
+- goto err_set_alloc_name;
++ goto err_set_name;
++
++ udata = NULL;
++ if (udlen) {
++ udata = set->data + size;
++ nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
++ }
++
++ INIT_LIST_HEAD(&set->bindings);
++ set->table = table;
++ write_pnet(&set->net, net);
++ set->ops = ops;
++ set->ktype = ktype;
++ set->klen = desc.klen;
++ set->dtype = dtype;
++ set->objtype = objtype;
++ set->dlen = desc.dlen;
++ set->flags = flags;
++ set->size = desc.size;
++ set->policy = policy;
++ set->udlen = udlen;
++ set->udata = udata;
++ set->timeout = timeout;
++ set->gc_int = gc_int;
++
++ set->field_count = desc.field_count;
++ for (i = 0; i < desc.field_count; i++)
++ set->field_len[i] = desc.field_len[i];
++
++ err = ops->init(set, &desc, nla);
++ if (err < 0)
++ goto err_set_init;
+
+ if (nla[NFTA_SET_EXPR]) {
+ expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+- goto err_set_alloc_name;
++ goto err_set_expr_alloc;
+ }
+ set->exprs[0] = expr;
+ set->num_exprs++;
+@@ -4334,74 +4365,44 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
+
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+- goto err_set_alloc_name;
++ goto err_set_expr_alloc;
+ }
+ i = 0;
+ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+ if (i == NFT_SET_EXPR_MAX) {
+ err = -E2BIG;
+- goto err_set_init;
++ goto err_set_expr_alloc;
+ }
+ if (nla_type(tmp) != NFTA_LIST_ELEM) {
+ err = -EINVAL;
+- goto err_set_init;
++ goto err_set_expr_alloc;
+ }
+ expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+- goto err_set_init;
++ goto err_set_expr_alloc;
+ }
+ set->exprs[i++] = expr;
+ set->num_exprs++;
+ }
+ }
+
+- udata = NULL;
+- if (udlen) {
+- udata = set->data + size;
+- nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+- }
+-
+- INIT_LIST_HEAD(&set->bindings);
+- set->table = table;
+- write_pnet(&set->net, net);
+- set->ops = ops;
+- set->ktype = ktype;
+- set->klen = desc.klen;
+- set->dtype = dtype;
+- set->objtype = objtype;
+- set->dlen = desc.dlen;
+- set->flags = flags;
+- set->size = desc.size;
+- set->policy = policy;
+- set->udlen = udlen;
+- set->udata = udata;
+- set->timeout = timeout;
+- set->gc_int = gc_int;
+ set->handle = nf_tables_alloc_handle(table);
+
+- set->field_count = desc.field_count;
+- for (i = 0; i < desc.field_count; i++)
+- set->field_len[i] = desc.field_len[i];
+-
+- err = ops->init(set, &desc, nla);
+- if (err < 0)
+- goto err_set_init;
+-
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+ if (err < 0)
+- goto err_set_trans;
++ goto err_set_expr_alloc;
+
+ list_add_tail_rcu(&set->list, &table->sets);
+ table->use++;
+ return 0;
+
+-err_set_trans:
+- ops->destroy(set);
+-err_set_init:
++err_set_expr_alloc:
+ for (i = 0; i < set->num_exprs; i++)
+ nft_expr_destroy(&ctx, set->exprs[i]);
+-err_set_alloc_name:
++
++ ops->destroy(set);
++err_set_init:
+ kfree(set->name);
+ err_set_name:
+ kvfree(set);
+--
+2.30.2
+
--- /dev/null
+From 5334a2dffae9fef08675ded525ad1d1f974b998a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 13:48:18 +0200
+Subject: netfilter: nft_fib_ipv6: skip ipv6 packets from any to link-local
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 12f36e9bf678a81d030ca1b693dcda62b55af7c5 ]
+
+The ip6tables rpfilter match has an extra check to skip packets with
+"::" source address.
+
+Extend this to ipv6 fib expression. Else ipv6 duplicate address detection
+packets will fail rpf route check -- lookup returns -ENETUNREACH.
+
+While at it, extend the prerouting check to also cover the ingress hook.
+
+Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1543
+Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/netfilter/nft_fib_ipv6.c | 22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index e204163c7036..92f3235fa287 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ }
+ EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+
++static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
++{
++ if (likely(next != IPPROTO_ICMPV6))
++ return false;
++
++ if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
++ return false;
++
++ return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
++}
++
+ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+ {
+@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
+
+- if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+- nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+- nft_fib_store_result(dest, priv, nft_in(pkt));
+- return;
++ if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
++ nft_hook(pkt) == NF_INET_INGRESS) {
++ if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
++ nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
++ nft_fib_store_result(dest, priv, nft_in(pkt));
++ return;
++ }
+ }
+
+ *dest = 0;
+--
+2.30.2
+
--- /dev/null
+From efc79d7aaae13e13b996fefb98b99438f0b14838 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:29 +0300
+Subject: netfilter: synproxy: Fix out of bounds when parsing TCP options
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit 5fc177ab759418c9537433e63301096e733fb915 ]
+
+The TCP option parser in synproxy (synproxy_parse_options) could read
+one byte out of bounds. When the length is 1, the execution flow gets
+into the loop, reads one byte of the opcode, and if the opcode is
+neither TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds
+the length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+v2 changes:
+
+Added an early return when length < 0 to avoid calling
+skb_header_pointer with negative length.
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: 48b1de4c110a ("netfilter: add SYNPROXY core/target")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_synproxy_core.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
+index b100c04a0e43..3d6d49420db8 100644
+--- a/net/netfilter/nf_synproxy_core.c
++++ b/net/netfilter/nf_synproxy_core.c
+@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+ int length = (th->doff * 4) - sizeof(*th);
+ u8 buf[40], *ptr;
+
++ if (unlikely(length < 0))
++ return false;
++
+ ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+ if (ptr == NULL)
+ return false;
+@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+ length--;
+ continue;
+ default:
++ if (length < 2)
++ return true;
+ opsize = *ptr++;
+ if (opsize < 2)
+ return true;
+--
+2.30.2
+
--- /dev/null
+From 3d8fdf3c44be6d0ccedb4c82e6fbe4a5174c8afa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 14:53:12 +0200
+Subject: netxen_nic: Fix an error handling path in 'netxen_nic_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 49a10c7b176295f8fafb338911cf028e97f65f4d ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: e87ad5539343 ("netxen: support pci error handlers")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+index 7e6bac85495d..344ea1143454 100644
+--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
++++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+@@ -1602,6 +1602,8 @@ err_out_free_netdev:
+ free_netdev(netdev);
+
+ err_out_free_res:
++ if (NX_IS_REVISION_P3(pdev->revision))
++ pci_disable_pcie_error_reporting(pdev);
+ pci_release_regions(pdev);
+
+ err_out_disable_pdev:
+--
+2.30.2
+
--- /dev/null
+From e73d0c8a1a1163e52cf2ca9a0ed08139d27c8cf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:24:05 -0700
+Subject: ptp: improve max_adj check against unreasonable values
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 475b92f932168a78da8109acd10bfb7578b8f2bb ]
+
+Scaled PPM conversion to PPB may (on 64bit systems) result
+in a value larger than s32 can hold (freq/scaled_ppm is a long).
+This means the kernel will not correctly reject unreasonably
+high ->freq values (e.g. > 4294967295ppb, 281474976645 scaled PPM).
+
+The conversion is equivalent to a division by ~66 (65.536),
+so the value of ppb is always smaller than ppm, but not small
+enough to assume narrowing the type from long -> s32 is okay.
+
+Note that reasonable user space (e.g. ptp4l) will not use such
+high values, anyway, 4289046510ppb ~= 4.3x, so the fix is
+somewhat pedantic.
+
+Fixes: d39a743511cd ("ptp: validate the requested frequency adjustment.")
+Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c | 6 +++---
+ include/linux/ptp_clock_kernel.h | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 03a246e60fd9..21c4c34c52d8 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
+ spin_unlock_irqrestore(&queue->lock, flags);
+ }
+
+-s32 scaled_ppm_to_ppb(long ppm)
++long scaled_ppm_to_ppb(long ppm)
+ {
+ /*
+ * The 'freq' field in the 'struct timex' is in parts per
+@@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm)
+ s64 ppb = 1 + ppm;
+ ppb *= 125;
+ ppb >>= 13;
+- return (s32) ppb;
++ return (long) ppb;
+ }
+ EXPORT_SYMBOL(scaled_ppm_to_ppb);
+
+@@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
+ delta = ktime_to_ns(kt);
+ err = ops->adjtime(ops, delta);
+ } else if (tx->modes & ADJ_FREQUENCY) {
+- s32 ppb = scaled_ppm_to_ppb(tx->freq);
++ long ppb = scaled_ppm_to_ppb(tx->freq);
+ if (ppb > ops->max_adj || ppb < -ops->max_adj)
+ return -ERANGE;
+ if (ops->adjfine)
+diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
+index 0d47fd33b228..51d7f1b8b32a 100644
+--- a/include/linux/ptp_clock_kernel.h
++++ b/include/linux/ptp_clock_kernel.h
+@@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
+ * @ppm: Parts per million, but with a 16 bit binary fractional field
+ */
+
+-extern s32 scaled_ppm_to_ppb(long ppm);
++extern long scaled_ppm_to_ppb(long ppm);
+
+ /**
+ * ptp_find_pin() - obtain the pin index of a given auxiliary function
+--
+2.30.2
+
--- /dev/null
+From eed8972fee58ba9a2f0da58f51449155d8361108 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 14:37:46 +0200
+Subject: qlcnic: Fix an error handling path in 'qlcnic_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit cb3376604a676e0302258b01893911bdd7aa5278 ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: 451724c821c1 ("qlcnic: aer support")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 96b947fde646..3beafc60747e 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2690,6 +2690,7 @@ err_out_free_hw_res:
+ kfree(ahw);
+
+ err_out_free_res:
++ pci_disable_pcie_error_reporting(pdev);
+ pci_release_regions(pdev);
+
+ err_out_disable_pdev:
+--
+2.30.2
+
--- /dev/null
+From 3f5647a93eff159721da77305b0e1f52825a74f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Feb 2021 12:27:53 +0200
+Subject: Revert "net/mlx5: Arm only EQs with EQEs"
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 7a545077cb6701957e84c7f158630bb5c984e648 ]
+
+In the scenario described below, an EQ can remain in FIRED state which
+can result in missing an interrupt generation.
+
+The scenario:
+
+device mlx5_core driver
+------ ----------------
+EQ1.eqe generated
+EQ1.MSI-X sent
+EQ1.state = FIRED
+EQ2.eqe generated
+ mlx5_irq()
+ polls - eq1_eqes()
+ arm eq1
+ polls - eq2_eqes()
+ arm eq2
+EQ2.MSI-X sent
+EQ2.state = FIRED
+ mlx5_irq()
+ polls - eq2_eqes() -- no eqes found
+ driver skips EQ arming;
+
+->EQ2 remains fired, misses generating interrupt.
+
+Hence, always arm the EQ by reverting the cited commit in fixes tag.
+
+Fixes: d894892dda25 ("net/mlx5: Arm only EQs with EQEs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index 1fa9c18563da..31c6a3b91f4a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
+
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+- return 0;
++ goto out;
+
+ do {
+ struct mlx5_core_cq *cq;
+@@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
+ ++eq->cons_index;
+
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
++
++out:
+ eq_update_ci(eq, 1);
+
+ if (cqn != -1)
+@@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
+ ++eq->cons_index;
+
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+- eq_update_ci(eq, 1);
+
+ out:
++ eq_update_ci(eq, 1);
+ mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
+
+ return unlikely(recovery) ? num_eqes : 0;
+--
+2.30.2
+
--- /dev/null
+From 86ae57e238612501b8889e2d61ff2d0584d811e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 14:17:53 +0300
+Subject: rtnetlink: Fix regression in bridge VLAN configuration
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit d2e381c4963663bca6f30c3b996fa4dbafe8fcb5 ]
+
+Cited commit started returning errors when notification info is not
+filled by the bridge driver, resulting in the following regression:
+
+ # ip link add name br1 type bridge vlan_filtering 1
+ # bridge vlan add dev br1 vid 555 self pvid untagged
+ RTNETLINK answers: Invalid argument
+
+As long as the bridge driver does not fill notification info for the
+bridge device itself, an empty notification should not be considered as
+an error. This is explained in commit 59ccaaaa49b5 ("bridge: dont send
+notification when skb->len == 0 in rtnl_bridge_notify").
+
+Fix by removing the error and add a comment to avoid future bugs.
+
+Fixes: a8db57c1d285 ("rtnetlink: Fix missing error code in rtnl_bridge_notify()")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 9ad046917b34..2123427883ba 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4833,10 +4833,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
+ if (err < 0)
+ goto errout;
+
+- if (!skb->len) {
+- err = -EINVAL;
++ /* Notification info is only filled for bridge ports, not the bridge
++ * device itself. Therefore, a zero notification length is valid and
++ * should not result in an error.
++ */
++ if (!skb->len)
+ goto errout;
+- }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+ return 0;
+--
+2.30.2
+
--- /dev/null
+From 7b48d4aba38d5a6cf2d06e9b9b0911de3af6e679 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:31 +0300
+Subject: sch_cake: Fix out of bounds when parsing TCP options and header
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit ba91c49dedbde758ba0b72f57ac90b06ddf8e548 ]
+
+The TCP option parser in cake qdisc (cake_get_tcpopt and
+cake_tcph_may_drop) could read one byte out of bounds. When the length
+is 1, the execution flow gets into the loop, reads one byte of the
+opcode, and if the opcode is neither TCPOPT_EOL nor TCPOPT_NOP, it reads
+one more byte, which exceeds the length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+v2 changes:
+
+Added doff validation in cake_get_tcphdr to avoid parsing garbage as TCP
+header. Although it wasn't strictly an out-of-bounds access (memory was
+allocated), garbage values could be read where CAKE expected the TCP
+header if doff was smaller than 5.
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: 8b7138814f29 ("sch_cake: Add optional ACK filter")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cake.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
+index 7d37638ee1c7..5c15968b5155 100644
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+ }
+
+ tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+- if (!tcph)
++ if (!tcph || tcph->doff < 5)
+ return NULL;
+
+ return skb_header_pointer(skb, offset,
+@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+ length--;
+ continue;
+ }
++ if (length < 2)
++ break;
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ break;
+@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+ length--;
+ continue;
+ }
++ if (length < 2)
++ break;
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ break;
+--
+2.30.2
+
--- /dev/null
+From 66cd5f79b5fac77a0ffaaa5dd80464daa578293a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:43 -0700
+Subject: selftests: mptcp: enable syncookie only in absence of reorders
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 2395da0e17935ce9158cdfae433962bdb6cbfa67 ]
+
+Syncookie validation may fail for OoO packets, causing spurious
+resets and self-tests failures, so let's force syncookie only
+for tests iteration with no OoO.
+
+Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/198
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_connect.sh | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+index 65b3b983efc2..8763706b0d04 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
+ ip -net "$ns4" route add default via 10.0.3.2
+ ip -net "$ns4" route add default via dead:beef:3::2
+
+-# use TCP syn cookies, even if no flooding was detected.
+-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+-
+ set_ethtool_flags() {
+ local ns="$1"
+ local dev="$2"
+@@ -711,6 +708,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
+ exit $ret
+ fi
+
++ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
++ # mptcp syncookie support.
++ if [ $sender = $ns1 ]; then
++ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
++ else
++ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
++ fi
++
+ run_tests "$ns2" $sender 10.0.1.2
+ run_tests "$ns2" $sender dead:beef:1::2
+ run_tests "$ns2" $sender 10.0.2.1
+--
+2.30.2
+
mm-memory-failure-make-sure-wait-for-page-writeback-.patch
kvm-lapic-restore-guard-to-prevent-illegal-apic-regi.patch
fanotify-fix-copy_event_to_user-fid-error-clean-up.patch
+batman-adv-avoid-warn_on-timing-related-checks.patch
+staging-rtl8723bs-fix-monitor-netdev-register-unregi.patch
+mac80211-fix-skb-length-check-in-ieee80211_scan_rx.patch
+mlxsw-reg-spectrum-3-enforce-lowest-max-shaper-burst.patch
+mlxsw-core-set-thermal-zone-polling-delay-argument-t.patch
+libbpf-fixes-incorrect-rx_ring_setup_done.patch
+net-ipv4-fix-memory-leak-in-netlbl_cipsov4_add_std.patch
+vrf-fix-maximum-mtu.patch
+net-rds-fix-memory-leak-in-rds_recvmsg.patch
+net-dsa-felix-re-enable-tx-flow-control-in-ocelot_po.patch
+net-ena-fix-dma-mapping-function-issues-in-xdp.patch
+net-lantiq-disable-interrupt-before-sheduling-napi.patch
+netfilter-nf_tables-initialize-set-before-expression.patch
+netfilter-nft_fib_ipv6-skip-ipv6-packets-from-any-to.patch
+ice-add-ndo_bpf-callback-for-safe-mode-netdev-ops.patch
+ice-parameterize-functions-responsible-for-tx-ring-m.patch
+udp-fix-race-between-close-and-udp_abort.patch
+rtnetlink-fix-regression-in-bridge-vlan-configuratio.patch
+net-sched-act_ct-handle-dnat-tuple-collision.patch
+net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh.patch
+net-mlx5e-remove-dependency-in-ipsec-initialization-.patch
+net-mlx5e-fix-page-reclaim-for-dead-peer-hairpin.patch
+net-mlx5-consider-roce-cap-before-init-rdma-resource.patch
+net-mlx5-dr-don-t-use-sw-steering-when-roce-is-not-s.patch
+revert-net-mlx5-arm-only-eqs-with-eqes.patch
+net-mlx5e-block-offload-of-outer-header-csum-for-udp.patch
+net-mlx5e-block-offload-of-outer-header-csum-for-gre.patch
+skbuff-fix-incorrect-msg_zerocopy-copy-notifications.patch
+netfilter-synproxy-fix-out-of-bounds-when-parsing-tc.patch
+mptcp-fix-out-of-bounds-when-parsing-tcp-options.patch
+sch_cake-fix-out-of-bounds-when-parsing-tcp-options-.patch
+mptcp-try-harder-to-borrow-memory-from-subflow-under.patch
+mptcp-wake-up-readers-only-for-in-sequence-data.patch
+mptcp-do-not-warn-on-bad-input-from-the-network.patch
+selftests-mptcp-enable-syncookie-only-in-absence-of-.patch
+mptcp-fix-soft-lookup-in-subflow_error_report.patch
+alx-fix-an-error-handling-path-in-alx_probe.patch
+cxgb4-fix-endianness-when-flashing-boot-image.patch
+cxgb4-fix-sleep-in-atomic-when-flashing-phy-firmware.patch
+cxgb4-halt-chip-before-flashing-phy-firmware-image.patch
+net-stmmac-dwmac1000-fix-extended-mac-address-regist.patch
+net-make-get_net_ns-return-error-if-net_ns-is-disabl.patch
+net-qualcomm-rmnet-don-t-over-count-statistics.patch
+ethtool-strset-fix-message-length-calculation.patch
+qlcnic-fix-an-error-handling-path-in-qlcnic_probe.patch
+netxen_nic-fix-an-error-handling-path-in-netxen_nic_.patch
+cxgb4-fix-wrong-ethtool-n-tuple-rule-lookup.patch
+ipv4-fix-device-used-for-dst_alloc-with-local-routes.patch
+net-qrtr-fix-oob-read-in-qrtr_endpoint_post.patch
+bpf-fix-leakage-under-speculation-on-mispredicted-br.patch
+net-mhi_net-update-the-transmit-handler-prototype.patch
+ptp-improve-max_adj-check-against-unreasonable-value.patch
+net-cdc_ncm-switch-to-eth-d-interface-naming.patch
+lantiq-net-fix-duplicated-skb-in-rx-descriptor-ring.patch
+net-usb-fix-possible-use-after-free-in-smsc75xx_bind.patch
+net-fec_ptp-fix-issue-caused-by-refactor-the-fec_dev.patch
+net-ipv4-fix-memory-leak-in-ip_mc_add1_src.patch
+net-af_unix-fix-a-data-race-in-unix_dgram_sendmsg-un.patch
+net-mlx5-fix-error-path-for-set-hca-defaults.patch
+net-mlx5-check-that-driver-was-probed-prior-attachin.patch
+net-mlx5-e-switch-read-pf-mac-address.patch
+net-mlx5-e-switch-allow-setting-guid-for-host-pf-vpo.patch
+net-mlx5-sf_dev-remove-sf-device-on-invalid-state.patch
+net-mlx5-dr-fix-stev1-incorrect-l3-decapsulation-pad.patch
+net-mlx5e-don-t-create-devices-during-unload-flow.patch
+net-mlx5-reset-mkey-index-on-creation.patch
+be2net-fix-an-error-handling-path-in-be_probe.patch
+net-hamradio-fix-memory-leak-in-mkiss_close.patch
+net-cdc_eem-fix-tx-fixup-skb-leak.patch
+cxgb4-fix-wrong-shift.patch
+bnxt_en-rediscover-phy-capabilities-after-firmware-r.patch
+bnxt_en-fix-tqm-fastpath-ring-backing-store-computat.patch
+bnxt_en-call-bnxt_ethtool_free-in-bnxt_init_one-erro.patch
+icmp-don-t-send-out-icmp-messages-with-a-source-addr.patch
+net-ethernet-fix-potential-use-after-free-in-ec_bhf_.patch
--- /dev/null
+From 98ec11e707621ca48c33ce6fe4ef56c0a76ab171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 18:41:57 -0400
+Subject: skbuff: fix incorrect msg_zerocopy copy notifications
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 3bdd5ee0ec8c14131d560da492e6df452c6fdd75 ]
+
+msg_zerocopy signals if a send operation required copying with a flag
+in serr->ee.ee_code.
+
+This field can be incorrect as of the below commit, as a result of
+both structs uarg and serr pointing into the same skb->cb[].
+
+uarg->zerocopy must be read before skb->cb[] is reinitialized to hold
+serr. Similar to other fields len, hi and lo, use a local variable to
+temporarily hold the value.
+
+This was not a problem before, when the value was passed as a function
+argument.
+
+Fixes: 75518851a2a0 ("skbuff: Push status and refcounts into sock_zerocopy_callback")
+Reported-by: Talal Ahmad <talalahmad@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skbuff.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index c421c8f80925..7997d99afbd8 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1252,6 +1252,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+ struct sock *sk = skb->sk;
+ struct sk_buff_head *q;
+ unsigned long flags;
++ bool is_zerocopy;
+ u32 lo, hi;
+ u16 len;
+
+@@ -1266,6 +1267,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+ len = uarg->len;
+ lo = uarg->id;
+ hi = uarg->id + len - 1;
++ is_zerocopy = uarg->zerocopy;
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+@@ -1273,7 +1275,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+ serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+ serr->ee.ee_data = hi;
+ serr->ee.ee_info = lo;
+- if (!uarg->zerocopy)
++ if (!is_zerocopy)
+ serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+ q = &sk->sk_error_queue;
+--
+2.30.2
+
--- /dev/null
+From 6349a75e29f89faf8e1fb61933b0bdd25238db0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Apr 2021 21:28:02 +0200
+Subject: staging: rtl8723bs: fix monitor netdev register/unregister
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit b90f51e8e1f5014c01c82a7bf4c611643d0a8bcb ]
+
+Due to the locking changes and callbacks happening inside
+cfg80211, we need to use cfg80211 versions of the register
+and unregister functions if called within cfg80211 methods,
+otherwise deadlocks occur.
+
+Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Link: https://lore.kernel.org/r/20210426212801.3d902cc9e6f4.Ie0b1e0c545920c61400a4b7d0f384ea61feb645a@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+index cbec65e5a464..62ea47f9fee5 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+@@ -2579,7 +2579,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
+ mon_wdev->iftype = NL80211_IFTYPE_MONITOR;
+ mon_ndev->ieee80211_ptr = mon_wdev;
+
+- ret = register_netdevice(mon_ndev);
++ ret = cfg80211_register_netdevice(mon_ndev);
+ if (ret) {
+ goto out;
+ }
+@@ -2661,7 +2661,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy,
+ adapter = rtw_netdev_priv(ndev);
+ pwdev_priv = adapter_wdev_data(adapter);
+
+- unregister_netdevice(ndev);
++ cfg80211_unregister_netdevice(ndev);
+
+ if (ndev == pwdev_priv->pmon_ndev) {
+ pwdev_priv->pmon_ndev = NULL;
+--
+2.30.2
+
--- /dev/null
+From 5cc965003a179d47da9d43c3172aa8e975c12b8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 11:49:01 +0200
+Subject: udp: fix race between close() and udp_abort()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit a8b897c7bcd47f4147d066e22cc01d1026d7640e ]
+
+Kaustubh reported and diagnosed a panic in udp_lib_lookup().
+The root cause is udp_abort() racing with close(). Both
+racing functions acquire the socket lock, but udp{v6}_destroy_sock()
+release it before performing destructive actions.
+
+We can't easily extend the socket lock scope to avoid the race,
+instead use the SOCK_DEAD flag to prevent udp_abort from doing
+any action when the critical race happens.
+
+Diagnosed-and-tested-by: Kaustubh Pandey <kapandey@codeaurora.org>
+Fixes: 5d77dca82839 ("net: diag: support SOCK_DESTROY for UDP sockets")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 10 ++++++++++
+ net/ipv6/udp.c | 3 +++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index c586a6bb8c6d..3dd340679d09 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2576,6 +2576,9 @@ void udp_destroy_sock(struct sock *sk)
+ {
+ struct udp_sock *up = udp_sk(sk);
+ bool slow = lock_sock_fast(sk);
++
++ /* protects from races with udp_abort() */
++ sock_set_flag(sk, SOCK_DEAD);
+ udp_flush_pending_frames(sk);
+ unlock_sock_fast(sk, slow);
+ if (static_branch_unlikely(&udp_encap_needed_key)) {
+@@ -2826,10 +2829,17 @@ int udp_abort(struct sock *sk, int err)
+ {
+ lock_sock(sk);
+
++ /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
++ * with close()
++ */
++ if (sock_flag(sk, SOCK_DEAD))
++ goto out;
++
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ __udp_disconnect(sk, 0);
+
++out:
+ release_sock(sk);
+
+ return 0;
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index d25e5a9252fd..29288f134d7a 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1597,6 +1597,9 @@ void udpv6_destroy_sock(struct sock *sk)
+ {
+ struct udp_sock *up = udp_sk(sk);
+ lock_sock(sk);
++
++ /* protects from races with udp_abort() */
++ sock_set_flag(sk, SOCK_DEAD);
+ udp_v6_flush_pending_frames(sk);
+ release_sock(sk);
+
+--
+2.30.2
+
--- /dev/null
+From 67d046381974224ad6630dfef2f99f1567693a7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 16:59:51 +0200
+Subject: vrf: fix maximum MTU
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 9bb392f62447d73cc7dd7562413a2cd9104c82f8 ]
+
+My initial goal was to fix the default MTU, which is set to 65536, ie above
+the maximum defined in the driver: 65535 (ETH_MAX_MTU).
+
+In fact, it's seems more consistent, wrt min_mtu, to set the max_mtu to
+IP6_MAX_MTU (65535 + sizeof(struct ipv6hdr)) and use it by default.
+
+Let's also, for consistency, set the mtu in vrf_setup(). This function
+calls ether_setup(), which set the mtu to 1500. Thus, the whole mtu config
+is done in the same function.
+
+Before the patch:
+$ ip link add blue type vrf table 1234
+$ ip link list blue
+9: blue: <NOARP,MASTER> mtu 65536 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+ link/ether fa:f5:27:70:24:2a brd ff:ff:ff:ff:ff:ff
+$ ip link set dev blue mtu 65535
+$ ip link set dev blue mtu 65536
+Error: mtu greater than device maximum.
+
+Fixes: 5055376a3b44 ("net: vrf: Fix ping failed when vrf mtu is set to 0")
+CC: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 503e2fd7ce51..28a6c4cfe9b8 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev)
+
+ dev->flags = IFF_MASTER | IFF_NOARP;
+
+- /* MTU is irrelevant for VRF device; set to 64k similar to lo */
+- dev->mtu = 64 * 1024;
+-
+ /* similarly, oper state is irrelevant; set to up to avoid confusion */
+ dev->operstate = IF_OPER_UP;
+ netdev_lockdep_set_classes(dev);
+@@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev)
+ * which breaks networking.
+ */
+ dev->min_mtu = IPV6_MIN_MTU;
+- dev->max_mtu = ETH_MAX_MTU;
++ dev->max_mtu = IP6_MAX_MTU;
++ dev->mtu = dev->max_mtu;
+ }
+
+ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
+--
+2.30.2
+