]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.12
authorSasha Levin <sashal@kernel.org>
Sun, 20 Jun 2021 03:15:22 +0000 (23:15 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 20 Jun 2021 03:15:22 +0000 (23:15 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
76 files changed:
queue-5.12/alx-fix-an-error-handling-path-in-alx_probe.patch [new file with mode: 0644]
queue-5.12/batman-adv-avoid-warn_on-timing-related-checks.patch [new file with mode: 0644]
queue-5.12/be2net-fix-an-error-handling-path-in-be_probe.patch [new file with mode: 0644]
queue-5.12/bnxt_en-call-bnxt_ethtool_free-in-bnxt_init_one-erro.patch [new file with mode: 0644]
queue-5.12/bnxt_en-fix-tqm-fastpath-ring-backing-store-computat.patch [new file with mode: 0644]
queue-5.12/bnxt_en-rediscover-phy-capabilities-after-firmware-r.patch [new file with mode: 0644]
queue-5.12/bpf-fix-leakage-under-speculation-on-mispredicted-br.patch [new file with mode: 0644]
queue-5.12/cxgb4-fix-endianness-when-flashing-boot-image.patch [new file with mode: 0644]
queue-5.12/cxgb4-fix-sleep-in-atomic-when-flashing-phy-firmware.patch [new file with mode: 0644]
queue-5.12/cxgb4-fix-wrong-ethtool-n-tuple-rule-lookup.patch [new file with mode: 0644]
queue-5.12/cxgb4-fix-wrong-shift.patch [new file with mode: 0644]
queue-5.12/cxgb4-halt-chip-before-flashing-phy-firmware-image.patch [new file with mode: 0644]
queue-5.12/ethtool-strset-fix-message-length-calculation.patch [new file with mode: 0644]
queue-5.12/ice-add-ndo_bpf-callback-for-safe-mode-netdev-ops.patch [new file with mode: 0644]
queue-5.12/ice-parameterize-functions-responsible-for-tx-ring-m.patch [new file with mode: 0644]
queue-5.12/icmp-don-t-send-out-icmp-messages-with-a-source-addr.patch [new file with mode: 0644]
queue-5.12/ipv4-fix-device-used-for-dst_alloc-with-local-routes.patch [new file with mode: 0644]
queue-5.12/lantiq-net-fix-duplicated-skb-in-rx-descriptor-ring.patch [new file with mode: 0644]
queue-5.12/libbpf-fixes-incorrect-rx_ring_setup_done.patch [new file with mode: 0644]
queue-5.12/mac80211-fix-skb-length-check-in-ieee80211_scan_rx.patch [new file with mode: 0644]
queue-5.12/mlxsw-core-set-thermal-zone-polling-delay-argument-t.patch [new file with mode: 0644]
queue-5.12/mlxsw-reg-spectrum-3-enforce-lowest-max-shaper-burst.patch [new file with mode: 0644]
queue-5.12/mptcp-do-not-warn-on-bad-input-from-the-network.patch [new file with mode: 0644]
queue-5.12/mptcp-fix-out-of-bounds-when-parsing-tcp-options.patch [new file with mode: 0644]
queue-5.12/mptcp-fix-soft-lookup-in-subflow_error_report.patch [new file with mode: 0644]
queue-5.12/mptcp-try-harder-to-borrow-memory-from-subflow-under.patch [new file with mode: 0644]
queue-5.12/mptcp-wake-up-readers-only-for-in-sequence-data.patch [new file with mode: 0644]
queue-5.12/net-af_unix-fix-a-data-race-in-unix_dgram_sendmsg-un.patch [new file with mode: 0644]
queue-5.12/net-cdc_eem-fix-tx-fixup-skb-leak.patch [new file with mode: 0644]
queue-5.12/net-cdc_ncm-switch-to-eth-d-interface-naming.patch [new file with mode: 0644]
queue-5.12/net-dsa-felix-re-enable-tx-flow-control-in-ocelot_po.patch [new file with mode: 0644]
queue-5.12/net-ena-fix-dma-mapping-function-issues-in-xdp.patch [new file with mode: 0644]
queue-5.12/net-ethernet-fix-potential-use-after-free-in-ec_bhf_.patch [new file with mode: 0644]
queue-5.12/net-fec_ptp-fix-issue-caused-by-refactor-the-fec_dev.patch [new file with mode: 0644]
queue-5.12/net-hamradio-fix-memory-leak-in-mkiss_close.patch [new file with mode: 0644]
queue-5.12/net-ipv4-fix-memory-leak-in-ip_mc_add1_src.patch [new file with mode: 0644]
queue-5.12/net-ipv4-fix-memory-leak-in-netlbl_cipsov4_add_std.patch [new file with mode: 0644]
queue-5.12/net-lantiq-disable-interrupt-before-sheduling-napi.patch [new file with mode: 0644]
queue-5.12/net-make-get_net_ns-return-error-if-net_ns-is-disabl.patch [new file with mode: 0644]
queue-5.12/net-mhi_net-update-the-transmit-handler-prototype.patch [new file with mode: 0644]
queue-5.12/net-mlx5-check-that-driver-was-probed-prior-attachin.patch [new file with mode: 0644]
queue-5.12/net-mlx5-consider-roce-cap-before-init-rdma-resource.patch [new file with mode: 0644]
queue-5.12/net-mlx5-dr-don-t-use-sw-steering-when-roce-is-not-s.patch [new file with mode: 0644]
queue-5.12/net-mlx5-dr-fix-stev1-incorrect-l3-decapsulation-pad.patch [new file with mode: 0644]
queue-5.12/net-mlx5-e-switch-allow-setting-guid-for-host-pf-vpo.patch [new file with mode: 0644]
queue-5.12/net-mlx5-e-switch-read-pf-mac-address.patch [new file with mode: 0644]
queue-5.12/net-mlx5-fix-error-path-for-set-hca-defaults.patch [new file with mode: 0644]
queue-5.12/net-mlx5-reset-mkey-index-on-creation.patch [new file with mode: 0644]
queue-5.12/net-mlx5-sf_dev-remove-sf-device-on-invalid-state.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-gre.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-udp.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-don-t-create-devices-during-unload-flow.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-fix-page-reclaim-for-dead-peer-hairpin.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh.patch [new file with mode: 0644]
queue-5.12/net-mlx5e-remove-dependency-in-ipsec-initialization-.patch [new file with mode: 0644]
queue-5.12/net-qrtr-fix-oob-read-in-qrtr_endpoint_post.patch [new file with mode: 0644]
queue-5.12/net-qualcomm-rmnet-don-t-over-count-statistics.patch [new file with mode: 0644]
queue-5.12/net-rds-fix-memory-leak-in-rds_recvmsg.patch [new file with mode: 0644]
queue-5.12/net-sched-act_ct-handle-dnat-tuple-collision.patch [new file with mode: 0644]
queue-5.12/net-stmmac-dwmac1000-fix-extended-mac-address-regist.patch [new file with mode: 0644]
queue-5.12/net-usb-fix-possible-use-after-free-in-smsc75xx_bind.patch [new file with mode: 0644]
queue-5.12/netfilter-nf_tables-initialize-set-before-expression.patch [new file with mode: 0644]
queue-5.12/netfilter-nft_fib_ipv6-skip-ipv6-packets-from-any-to.patch [new file with mode: 0644]
queue-5.12/netfilter-synproxy-fix-out-of-bounds-when-parsing-tc.patch [new file with mode: 0644]
queue-5.12/netxen_nic-fix-an-error-handling-path-in-netxen_nic_.patch [new file with mode: 0644]
queue-5.12/ptp-improve-max_adj-check-against-unreasonable-value.patch [new file with mode: 0644]
queue-5.12/qlcnic-fix-an-error-handling-path-in-qlcnic_probe.patch [new file with mode: 0644]
queue-5.12/revert-net-mlx5-arm-only-eqs-with-eqes.patch [new file with mode: 0644]
queue-5.12/rtnetlink-fix-regression-in-bridge-vlan-configuratio.patch [new file with mode: 0644]
queue-5.12/sch_cake-fix-out-of-bounds-when-parsing-tcp-options-.patch [new file with mode: 0644]
queue-5.12/selftests-mptcp-enable-syncookie-only-in-absence-of-.patch [new file with mode: 0644]
queue-5.12/series
queue-5.12/skbuff-fix-incorrect-msg_zerocopy-copy-notifications.patch [new file with mode: 0644]
queue-5.12/staging-rtl8723bs-fix-monitor-netdev-register-unregi.patch [new file with mode: 0644]
queue-5.12/udp-fix-race-between-close-and-udp_abort.patch [new file with mode: 0644]
queue-5.12/vrf-fix-maximum-mtu.patch [new file with mode: 0644]

diff --git a/queue-5.12/alx-fix-an-error-handling-path-in-alx_probe.patch b/queue-5.12/alx-fix-an-error-handling-path-in-alx_probe.patch
new file mode 100644 (file)
index 0000000..913be4f
--- /dev/null
@@ -0,0 +1,36 @@
+From d17352d49e46ad66c502b7926e7f0b509eee7f87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 08:13:39 +0200
+Subject: alx: Fix an error handling path in 'alx_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 33e381448cf7a05d76ac0b47d4a6531ecd0e5c53 ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: ab69bde6b2e9 ("alx: add a simple AR816x/AR817x device driver")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/atheros/alx/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
+index 9e02f8864593..5e90df42b201 100644
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -1849,6 +1849,7 @@ out_free_netdev:
+       free_netdev(netdev);
+ out_pci_release:
+       pci_release_mem_regions(pdev);
++      pci_disable_pcie_error_reporting(pdev);
+ out_pci_disable:
+       pci_disable_device(pdev);
+       return err;
+-- 
+2.30.2
+
diff --git a/queue-5.12/batman-adv-avoid-warn_on-timing-related-checks.patch b/queue-5.12/batman-adv-avoid-warn_on-timing-related-checks.patch
new file mode 100644 (file)
index 0000000..96050b1
--- /dev/null
@@ -0,0 +1,45 @@
+From 3edb6579f5a77dd64672179a579e6441df456105 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 May 2021 21:00:27 +0200
+Subject: batman-adv: Avoid WARN_ON timing related checks
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 9f460ae31c4435fd022c443a6029352217a16ac1 ]
+
+The soft/batadv interface for a queued OGM can be changed during the time
+the OGM was queued for transmission and when the OGM is actually
+transmitted by the worker.
+
+But WARN_ON must be used to denote kernel bugs and not to print simple
+warnings. A warning can simply be printed using pr_warn.
+
+Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Reported-by: syzbot+c0b807de416427ff3dd1@syzkaller.appspotmail.com
+Fixes: ef0a937f7a14 ("batman-adv: consider outgoing interface in OGM sending")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/batman-adv/bat_iv_ogm.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
+index a5e313cd6f44..b9dd150f6f01 100644
+--- a/net/batman-adv/bat_iv_ogm.c
++++ b/net/batman-adv/bat_iv_ogm.c
+@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
+       if (WARN_ON(!forw_packet->if_outgoing))
+               return;
+-      if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
++      if (forw_packet->if_outgoing->soft_iface != soft_iface) {
++              pr_warn("%s: soft interface switch for queued OGM\n", __func__);
+               return;
++      }
+       if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
+               return;
+-- 
+2.30.2
+
diff --git a/queue-5.12/be2net-fix-an-error-handling-path-in-be_probe.patch b/queue-5.12/be2net-fix-an-error-handling-path-in-be_probe.patch
new file mode 100644 (file)
index 0000000..81e76ad
--- /dev/null
@@ -0,0 +1,37 @@
+From 9678ee300e0cc1278aa5f01698abf974dcecbbe6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 20:43:37 +0200
+Subject: be2net: Fix an error handling path in 'be_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit c19c8c0e666f9259e2fc4d2fa4b9ff8e3b40ee5d ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: d6b6d9877878 ("be2net: use PCIe AER capability")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Acked-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
+index b6eba29d8e99..7968568bbe21 100644
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -5897,6 +5897,7 @@ drv_cleanup:
+ unmap_bars:
+       be_unmap_pci_bars(adapter);
+ free_netdev:
++      pci_disable_pcie_error_reporting(pdev);
+       free_netdev(netdev);
+ rel_reg:
+       pci_release_regions(pdev);
+-- 
+2.30.2
+
diff --git a/queue-5.12/bnxt_en-call-bnxt_ethtool_free-in-bnxt_init_one-erro.patch b/queue-5.12/bnxt_en-call-bnxt_ethtool_free-in-bnxt_init_one-erro.patch
new file mode 100644 (file)
index 0000000..30645c9
--- /dev/null
@@ -0,0 +1,37 @@
+From ad57dca139f7b019a7514e7c48e30830ca8b1f21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:27 -0400
+Subject: bnxt_en: Call bnxt_ethtool_free() in bnxt_init_one() error path
+
+From: Somnath Kotur <somnath.kotur@broadcom.com>
+
+[ Upstream commit 03400aaa69f916a376e11526cf591901a96a3a5c ]
+
+bnxt_ethtool_init() may have allocated some memory and we need to
+call bnxt_ethtool_free() to properly unwind if bnxt_init_one()
+fails.
+
+Fixes: 7c3809181468 ("bnxt_en: Refactor bnxt_init_one() and turn on TPA support on 57500 chips.")
+Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 48c19602a0f3..c118de27bc5c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -12981,6 +12981,7 @@ init_err_pci_clean:
+       bnxt_hwrm_func_drv_unrgtr(bp);
+       bnxt_free_hwrm_short_cmd_req(bp);
+       bnxt_free_hwrm_resources(bp);
++      bnxt_ethtool_free(bp);
+       kfree(bp->fw_health);
+       bp->fw_health = NULL;
+       bnxt_cleanup_pci(bp);
+-- 
+2.30.2
+
diff --git a/queue-5.12/bnxt_en-fix-tqm-fastpath-ring-backing-store-computat.patch b/queue-5.12/bnxt_en-fix-tqm-fastpath-ring-backing-store-computat.patch
new file mode 100644 (file)
index 0000000..d20de49
--- /dev/null
@@ -0,0 +1,41 @@
+From 0c4f5481c0c1590b05eec515e2df208bf3eda50d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:26 -0400
+Subject: bnxt_en: Fix TQM fastpath ring backing store computation
+
+From: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
+
+[ Upstream commit c12e1643d2738bcd4e26252ce531878841dd3f38 ]
+
+TQM fastpath ring needs to be sized to store both the requester
+and responder side of RoCE QPs in TQM for supporting bi-directional
+tests.  Fix bnxt_alloc_ctx_mem() to multiply the RoCE QPs by a factor of
+2 when computing the number of entries for TQM fastpath ring.  This
+fixes an RX pipeline stall issue when running bi-directional max
+RoCE QP tests.
+
+Fixes: c7dd7ab4b204 ("bnxt_en: Improve TQM ring context memory sizing formulas.")
+Signed-off-by: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index e9ac20c1c389..48c19602a0f3 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -7295,7 +7295,7 @@ skip_rdma:
+       entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
+                    2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
+       entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
+-      entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
++      entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
+       entries = roundup(entries, ctx->tqm_entries_multiple);
+       entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
+       for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
+-- 
+2.30.2
+
diff --git a/queue-5.12/bnxt_en-rediscover-phy-capabilities-after-firmware-r.patch b/queue-5.12/bnxt_en-rediscover-phy-capabilities-after-firmware-r.patch
new file mode 100644 (file)
index 0000000..876a7d8
--- /dev/null
@@ -0,0 +1,49 @@
+From 23ad4c4a9adb2b5d384a3f985b26f3e058a66e4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 02:07:25 -0400
+Subject: bnxt_en: Rediscover PHY capabilities after firmware reset
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 0afd6a4e8028cc487c240b6cfe04094e45a306e4 ]
+
+There is a missing bnxt_probe_phy() call in bnxt_fw_init_one() to
+rediscover the PHY capabilities after a firmware reset.  This can cause
+some PHY related functionalities to fail after a firmware reset.  For
+example, in multi-host, the ability for any host to configure the PHY
+settings may be lost after a firmware reset.
+
+Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 027997c711ab..e9ac20c1c389 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11573,6 +11573,8 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
+       bnxt_hwrm_coal_params_qcaps(bp);
+ }
++static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
++
+ static int bnxt_fw_init_one(struct bnxt *bp)
+ {
+       int rc;
+@@ -11587,6 +11589,9 @@ static int bnxt_fw_init_one(struct bnxt *bp)
+               netdev_err(bp->dev, "Firmware init phase 2 failed\n");
+               return rc;
+       }
++      rc = bnxt_probe_phy(bp, false);
++      if (rc)
++              return rc;
+       rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
+       if (rc)
+               return rc;
+-- 
+2.30.2
+
diff --git a/queue-5.12/bpf-fix-leakage-under-speculation-on-mispredicted-br.patch b/queue-5.12/bpf-fix-leakage-under-speculation-on-mispredicted-br.patch
new file mode 100644 (file)
index 0000000..cb9a154
--- /dev/null
@@ -0,0 +1,222 @@
+From efe2ba8d2a2c8826b0c9a1e472b882239c6816ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 15:47:32 +0000
+Subject: bpf: Fix leakage under speculation on mispredicted branches
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 9183671af6dbf60a1219371d4ed73e23f43b49db ]
+
+The verifier only enumerates valid control-flow paths and skips paths that
+are unreachable in the non-speculative domain. And so it can miss issues
+under speculative execution on mispredicted branches.
+
+For example, a type confusion has been demonstrated with the following
+crafted program:
+
+  // r0 = pointer to a map array entry
+  // r6 = pointer to readable stack slot
+  // r9 = scalar controlled by attacker
+  1: r0 = *(u64 *)(r0) // cache miss
+  2: if r0 != 0x0 goto line 4
+  3: r6 = r9
+  4: if r0 != 0x1 goto line 6
+  5: r9 = *(u8 *)(r6)
+  6: // leak r9
+
+Since line 3 runs iff r0 == 0 and line 5 runs iff r0 == 1, the verifier
+concludes that the pointer dereference on line 5 is safe. But: if the
+attacker trains both the branches to fall-through, such that the following
+is speculatively executed ...
+
+  r6 = r9
+  r9 = *(u8 *)(r6)
+  // leak r9
+
+... then the program will dereference an attacker-controlled value and could
+leak its content under speculative execution via side-channel. This requires
+to mistrain the branch predictor, which can be rather tricky, because the
+branches are mutually exclusive. However such training can be done at
+congruent addresses in user space using different branches that are not
+mutually exclusive. That is, by training branches in user space ...
+
+  A:  if r0 != 0x0 goto line C
+  B:  ...
+  C:  if r0 != 0x0 goto line D
+  D:  ...
+
+... such that addresses A and C collide to the same CPU branch prediction
+entries in the PHT (pattern history table) as those of the BPF program's
+lines 2 and 4, respectively. A non-privileged attacker could simply brute
+force such collisions in the PHT until observing the attack succeeding.
+
+Alternative methods to mistrain the branch predictor are also possible that
+avoid brute forcing the collisions in the PHT. A reliable attack has been
+demonstrated, for example, using the following crafted program:
+
+  // r0 = pointer to a [control] map array entry
+  // r7 = *(u64 *)(r0 + 0), training/attack phase
+  // r8 = *(u64 *)(r0 + 8), oob address
+  // [...]
+  // r0 = pointer to a [data] map array entry
+  1: if r7 == 0x3 goto line 3
+  2: r8 = r0
+  // crafted sequence of conditional jumps to separate the conditional
+  // branch in line 193 from the current execution flow
+  3: if r0 != 0x0 goto line 5
+  4: if r0 == 0x0 goto exit
+  5: if r0 != 0x0 goto line 7
+  6: if r0 == 0x0 goto exit
+  [...]
+  187: if r0 != 0x0 goto line 189
+  188: if r0 == 0x0 goto exit
+  // load any slowly-loaded value (due to cache miss in phase 3) ...
+  189: r3 = *(u64 *)(r0 + 0x1200)
+  // ... and turn it into known zero for verifier, while preserving slowly-
+  // loaded dependency when executing:
+  190: r3 &= 1
+  191: r3 &= 2
+  // speculatively bypassed phase dependency
+  192: r7 += r3
+  193: if r7 == 0x3 goto exit
+  194: r4 = *(u8 *)(r8 + 0)
+  // leak r4
+
+As can be seen, in training phase (phase != 0x3), the condition in line 1
+turns into false and therefore r8 with the oob address is overridden with
+the valid map value address, which in line 194 we can read out without
+issues. However, in attack phase, line 2 is skipped, and due to the cache
+miss in line 189 where the map value is (zeroed and later) added to the
+phase register, the condition in line 193 takes the fall-through path due
+to prior branch predictor training, where under speculation, it'll load the
+byte at oob address r8 (unknown scalar type at that point) which could then
+be leaked via side-channel.
+
+One way to mitigate these is to 'branch off' an unreachable path, meaning,
+the current verification path keeps following the is_branch_taken() path
+and we push the other branch to the verification stack. Given this is
+unreachable from the non-speculative domain, this branch's vstate is
+explicitly marked as speculative. This is needed for two reasons: i) if
+this path is solely seen from speculative execution, then we later on still
+want the dead code elimination to kick in in order to sanitize these
+instructions with jmp-1s, and ii) to ensure that paths walked in the
+non-speculative domain are not pruned from earlier walks of paths walked in
+the speculative domain. Additionally, for robustness, we mark the registers
+which have been part of the conditional as unknown in the speculative path
+given there should be no assumptions made on their content.
+
+The fix in here mitigates type confusion attacks described earlier due to
+i) all code paths in the BPF program being explored and ii) existing
+verifier logic already ensuring that given memory access instruction
+references one specific data structure.
+
+An alternative to this fix that has also been looked at in this scope was to
+mark aux->alu_state at the jump instruction with a BPF_JMP_TAKEN state as
+well as direction encoding (always-goto, always-fallthrough, unknown), such
+that mixing of different always-* directions themselves as well as mixing of
+always-* with unknown directions would cause a program rejection by the
+verifier, e.g. programs with constructs like 'if ([...]) { x = 0; } else
+{ x = 1; }' with subsequent 'if (x == 1) { [...] }'. For unprivileged, this
+would result in only single direction always-* taken paths, and unknown taken
+paths being allowed, such that the former could be patched from a conditional
+jump to an unconditional jump (ja). Compared to this approach here, it would
+have two downsides: i) valid programs that otherwise are not performing any
+pointer arithmetic, etc, would potentially be rejected/broken, and ii) we are
+required to turn off path pruning for unprivileged, where both can be avoided
+in this work through pushing the invalid branch to the verification stack.
+
+The issue was originally discovered by Adam and Ofek, and later independently
+discovered and reported as a result of Benedict and Piotr's research work.
+
+Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
+Reported-by: Adam Morrison <mad@cs.tau.ac.il>
+Reported-by: Ofek Kirzner <ofekkir@gmail.com>
+Reported-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reported-by: Piotr Krysiuk <piotras@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 40 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 9e600767803b..bdf4be10c8cc 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5937,6 +5937,27 @@ struct bpf_sanitize_info {
+       bool mask_to_left;
+ };
++static struct bpf_verifier_state *
++sanitize_speculative_path(struct bpf_verifier_env *env,
++                        const struct bpf_insn *insn,
++                        u32 next_idx, u32 curr_idx)
++{
++      struct bpf_verifier_state *branch;
++      struct bpf_reg_state *regs;
++
++      branch = push_stack(env, next_idx, curr_idx, true);
++      if (branch && insn) {
++              regs = branch->frame[branch->curframe]->regs;
++              if (BPF_SRC(insn->code) == BPF_K) {
++                      mark_reg_unknown(env, regs, insn->dst_reg);
++              } else if (BPF_SRC(insn->code) == BPF_X) {
++                      mark_reg_unknown(env, regs, insn->dst_reg);
++                      mark_reg_unknown(env, regs, insn->src_reg);
++              }
++      }
++      return branch;
++}
++
+ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+                           struct bpf_insn *insn,
+                           const struct bpf_reg_state *ptr_reg,
+@@ -6020,7 +6041,8 @@ do_sim:
+               tmp = *dst_reg;
+               *dst_reg = *ptr_reg;
+       }
+-      ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
++      ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
++                                      env->insn_idx);
+       if (!ptr_is_dst_reg && ret)
+               *dst_reg = tmp;
+       return !ret ? REASON_STACK : 0;
+@@ -8204,14 +8226,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+               if (err)
+                       return err;
+       }
++
+       if (pred == 1) {
+-              /* only follow the goto, ignore fall-through */
++              /* Only follow the goto, ignore fall-through. If needed, push
++               * the fall-through branch for simulation under speculative
++               * execution.
++               */
++              if (!env->bypass_spec_v1 &&
++                  !sanitize_speculative_path(env, insn, *insn_idx + 1,
++                                             *insn_idx))
++                      return -EFAULT;
+               *insn_idx += insn->off;
+               return 0;
+       } else if (pred == 0) {
+-              /* only follow fall-through branch, since
+-               * that's where the program will go
++              /* Only follow the fall-through branch, since that's where the
++               * program will go. If needed, push the goto branch for
++               * simulation under speculative execution.
+                */
++              if (!env->bypass_spec_v1 &&
++                  !sanitize_speculative_path(env, insn,
++                                             *insn_idx + insn->off + 1,
++                                             *insn_idx))
++                      return -EFAULT;
+               return 0;
+       }
+-- 
+2.30.2
+
diff --git a/queue-5.12/cxgb4-fix-endianness-when-flashing-boot-image.patch b/queue-5.12/cxgb4-fix-endianness-when-flashing-boot-image.patch
new file mode 100644 (file)
index 0000000..9934a40
--- /dev/null
@@ -0,0 +1,153 @@
+From 46fc4070a270d6f68c7af51ce002c0d4a10b9760 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:45 +0530
+Subject: cxgb4: fix endianness when flashing boot image
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 42a2039753a7f758ba5c85cb199fcf10dc2111eb ]
+
+Boot images are copied to memory and updated with current underlying
+device ID before flashing them to adapter. Ensure the updated images
+are always flashed in Big Endian to allow the firmware to read the
+new images during boot properly.
+
+Fixes: 550883558f17 ("cxgb4: add support to flash boot image")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 44 +++++++++++++---------
+ 1 file changed, 27 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+index 80882cfc370f..029f0c83d785 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
+  *    @addr: the start address to write
+  *    @n: length of data to write in bytes
+  *    @data: the data to write
++ *    @byte_oriented: whether to store data as bytes or as words
+  *
+  *    Writes up to a page of data (256 bytes) to the serial flash starting
+  *    at the given address.  All the data must be written to the same page.
++ *    If @byte_oriented is set the write data is stored as byte stream
++ *    (i.e. matches what on disk), otherwise in big-endian.
+  */
+ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+-                        unsigned int n, const u8 *data)
++                        unsigned int n, const u8 *data, bool byte_oriented)
+ {
+-      int ret;
+-      u32 buf[64];
+       unsigned int i, c, left, val, offset = addr & 0xff;
++      u32 buf[64];
++      int ret;
+       if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
+               return -EINVAL;
+@@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+           (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
+               goto unlock;
+-      for (left = n; left; left -= c) {
++      for (left = n; left; left -= c, data += c) {
+               c = min(left, 4U);
+-              for (val = 0, i = 0; i < c; ++i)
+-                      val = (val << 8) + *data++;
++              for (val = 0, i = 0; i < c; ++i) {
++                      if (byte_oriented)
++                              val = (val << 8) + data[i];
++                      else
++                              val = (val << 8) + data[c - i - 1];
++              }
+               ret = sf1_write(adapter, c, c != left, 1, val);
+               if (ret)
+@@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
+       t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
+       /* Read the page to verify the write succeeded */
+-      ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
++      ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
++                          byte_oriented);
+       if (ret)
+               return ret;
+@@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
+        */
+       memcpy(first_page, fw_data, SF_PAGE_SIZE);
+       ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
+-      ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
++      ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
+       if (ret)
+               goto out;
+@@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
+       for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+               addr += SF_PAGE_SIZE;
+               fw_data += SF_PAGE_SIZE;
+-              ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
++              ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
+               if (ret)
+                       goto out;
+       }
+-      ret = t4_write_flash(adap,
+-                           fw_start + offsetof(struct fw_hdr, fw_ver),
+-                           sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
++      ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
++                           sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
++                           true);
+ out:
+       if (ret)
+               dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
+@@ -10208,7 +10216,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+                       n = size - i;
+               else
+                       n = SF_PAGE_SIZE;
+-              ret = t4_write_flash(adap, addr, n, cfg_data);
++              ret = t4_write_flash(adap, addr, n, cfg_data, true);
+               if (ret)
+                       goto out;
+@@ -10677,13 +10685,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data,
+       for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+               addr += SF_PAGE_SIZE;
+               boot_data += SF_PAGE_SIZE;
+-              ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
++              ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
++                                   false);
+               if (ret)
+                       goto out;
+       }
+       ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
+-                           (const u8 *)header);
++                           (const u8 *)header, false);
+ out:
+       if (ret)
+@@ -10758,7 +10767,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+       for (i = 0; i < size; i += SF_PAGE_SIZE) {
+               n = min_t(u32, size - i, SF_PAGE_SIZE);
+-              ret = t4_write_flash(adap, addr, n, cfg_data);
++              ret = t4_write_flash(adap, addr, n, cfg_data, false);
+               if (ret)
+                       goto out;
+@@ -10770,7 +10779,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+       for (i = 0; i < npad; i++) {
+               u8 data = 0;
+-              ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
++              ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
++                                   false);
+               if (ret)
+                       goto out;
+       }
+-- 
+2.30.2
+
diff --git a/queue-5.12/cxgb4-fix-sleep-in-atomic-when-flashing-phy-firmware.patch b/queue-5.12/cxgb4-fix-sleep-in-atomic-when-flashing-phy-firmware.patch
new file mode 100644 (file)
index 0000000..fbf42a1
--- /dev/null
@@ -0,0 +1,72 @@
+From 02c1b4978bc0d4b9ea05c0a576aef7c535a8b868 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:46 +0530
+Subject: cxgb4: fix sleep in atomic when flashing PHY firmware
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit f046bd0ae15d8a0bbe57d4647da182420f720c3d ]
+
+Before writing new PHY firmware to on-chip memory, driver queries
+firmware for current running PHY firmware version, which can result
+in sleep waiting for reply. So, move spinlock closer to the actual
+on-chip memory write operation, instead of taking it at the callers.
+
+Fixes: 5fff701c838e ("cxgb4: always sync access when flashing PHY firmware")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 --
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 2 --
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         | 2 ++
+ 3 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index 61ea3ec5c3fc..bc2de01d0539 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1337,9 +1337,7 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
+               return ret;
+       }
+-      spin_lock_bh(&adap->win0_lock);
+       ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
+-      spin_unlock_bh(&adap->win0_lock);
+       if (ret)
+               dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+index 1f601de02e70..762113a04dde 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+@@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap)
+       /* Load PHY Firmware onto adapter.
+        */
+-      spin_lock_bh(&adap->win0_lock);
+       ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
+                            (u8 *)phyf->data, phyf->size);
+-      spin_unlock_bh(&adap->win0_lock);
+       if (ret < 0)
+               dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
+                       -ret);
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+index 029f0c83d785..601853bb34c9 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -3820,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win,
+       /* Copy the supplied PHY Firmware image to the adapter memory location
+        * allocated by the adapter firmware.
+        */
++      spin_lock_bh(&adap->win0_lock);
+       ret = t4_memory_rw(adap, win, mtype, maddr,
+                          phy_fw_size, (__be32 *)phy_fw_data,
+                          T4_MEMORY_WRITE);
++      spin_unlock_bh(&adap->win0_lock);
+       if (ret)
+               return ret;
+-- 
+2.30.2
+
diff --git a/queue-5.12/cxgb4-fix-wrong-ethtool-n-tuple-rule-lookup.patch b/queue-5.12/cxgb4-fix-wrong-ethtool-n-tuple-rule-lookup.patch
new file mode 100644 (file)
index 0000000..fae2ab4
--- /dev/null
@@ -0,0 +1,78 @@
+From 0e203293194837d0583ef07a7ba6e942b08f163c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 19:20:44 +0530
+Subject: cxgb4: fix wrong ethtool n-tuple rule lookup
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 09427c1915f754ebe7d3d8e54e79bbee48afe916 ]
+
+The TID returned during successful filter creation is relative to
+the region in which the filter is created. Using it directly always
+returns Hi Prio/Normal filter region's entry for the first couple of
+entries, even though the rule is actually inserted in Hash region.
+Fix by analyzing in which region the filter has been inserted and
+save the absolute TID to be used for lookup later.
+
+Fixes: db43b30cd89c ("cxgb4: add ethtool n-tuple filter deletion")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    | 24 ++++++++++++-------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index df20485b5744..83ed10ac8660 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1624,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
+                                                  u32 ftid)
+ {
+       struct tid_info *t = &adap->tids;
+-      struct filter_entry *f;
+-      if (ftid < t->nhpftids)
+-              f = &adap->tids.hpftid_tab[ftid];
+-      else if (ftid < t->nftids)
+-              f = &adap->tids.ftid_tab[ftid - t->nhpftids];
+-      else
+-              f = lookup_tid(&adap->tids, ftid);
++      if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
++              return &t->hpftid_tab[ftid - t->hpftid_base];
++
++      if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
++              return &t->ftid_tab[ftid - t->ftid_base];
+-      return f;
++      return lookup_tid(t, ftid);
+ }
+ static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
+@@ -1840,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev,
+       filter_id = filter_info->loc_array[cmd->fs.location];
+       f = cxgb4_get_filter_entry(adapter, filter_id);
++      if (f->fs.prio)
++              filter_id -= adapter->tids.hpftid_base;
++      else if (!f->fs.hash)
++              filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
++
+       ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
+       if (ret)
+               goto err;
+@@ -1899,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev,
+       filter_info = &adapter->ethtool_filters->port[pi->port_id];
++      if (fs.prio)
++              tid += adapter->tids.hpftid_base;
++      else if (!fs.hash)
++              tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
++
+       filter_info->loc_array[cmd->fs.location] = tid;
+       set_bit(cmd->fs.location, filter_info->bmap);
+       filter_info->in_use++;
+-- 
+2.30.2
+
diff --git a/queue-5.12/cxgb4-fix-wrong-shift.patch b/queue-5.12/cxgb4-fix-wrong-shift.patch
new file mode 100644 (file)
index 0000000..86b195b
--- /dev/null
@@ -0,0 +1,36 @@
+From f2be84d93f7968155528be67b1b659636559ed1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 11:29:48 +0200
+Subject: cxgb4: fix wrong shift.
+
+From: Pavel Machek <pavel@denx.de>
+
+[ Upstream commit 39eb028183bc7378bb6187067e20bf6d8c836407 ]
+
+While fixing coverity warning, commit dd2c79677375 introduced typo in
+shift value. Fix that.
+
+Signed-off-by: Pavel Machek (CIP) <pavel@denx.de>
+Fixes: dd2c79677375 ("cxgb4: Fix unintentional sign extension issues")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+index e664e05b9f02..5fbc087268db 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+@@ -198,7 +198,7 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
+                                     WORD_MASK, f->fs.nat_lip[3] |
+                                     f->fs.nat_lip[2] << 8 |
+                                     f->fs.nat_lip[1] << 16 |
+-                                    (u64)f->fs.nat_lip[0] << 25, 1);
++                                    (u64)f->fs.nat_lip[0] << 24, 1);
+               }
+       }
+-- 
+2.30.2
+
diff --git a/queue-5.12/cxgb4-halt-chip-before-flashing-phy-firmware-image.patch b/queue-5.12/cxgb4-halt-chip-before-flashing-phy-firmware-image.patch
new file mode 100644 (file)
index 0000000..d1e20b0
--- /dev/null
@@ -0,0 +1,61 @@
+From e614a1f374eae5b2a3d6a77f8258ca046e802f01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 12:17:47 +0530
+Subject: cxgb4: halt chip before flashing PHY firmware image
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 6d297540f75d759489054e8b07932208fc4db2cb ]
+
+When using firmware-assisted PHY firmware image write to flash,
+halt the chip before beginning the flash write operation to allow
+the running firmware to store the image persistently. Otherwise,
+the running firmware will only store the PHY image in local on-chip
+RAM, which will be lost after next reset.
+
+Fixes: 4ee339e1e92a ("cxgb4: add support to flash PHY image")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    | 22 ++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index bc2de01d0539..df20485b5744 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -1337,11 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
+               return ret;
+       }
++      /* We have to RESET the chip/firmware because we need the
++       * chip in uninitialized state for loading new PHY image.
++       * Otherwise, the running firmware will only store the PHY
++       * image in local RAM which will be lost after next reset.
++       */
++      ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
++      if (ret < 0) {
++              dev_err(adap->pdev_dev,
++                      "Set FW to RESET for flashing PHY FW failed. ret: %d\n",
++                      ret);
++              return ret;
++      }
++
+       ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
+-      if (ret)
+-              dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
++      if (ret < 0) {
++              dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
++                      ret);
++              return ret;
++      }
+-      return ret;
++      return 0;
+ }
+ static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
+-- 
+2.30.2
+
diff --git a/queue-5.12/ethtool-strset-fix-message-length-calculation.patch b/queue-5.12/ethtool-strset-fix-message-length-calculation.patch
new file mode 100644 (file)
index 0000000..a38e10e
--- /dev/null
@@ -0,0 +1,51 @@
+From ec84e0e4f5cfdfb8270a1671db1f4d8d30ed56c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 18:49:48 -0700
+Subject: ethtool: strset: fix message length calculation
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit e175aef902697826d344ce3a12189329848fe898 ]
+
+Outer nest for ETHTOOL_A_STRSET_STRINGSETS is not accounted for.
+This may result in ETHTOOL_MSG_STRSET_GET producing a warning like:
+
+    calculated message payload length (684) not sufficient
+    WARNING: CPU: 0 PID: 30967 at net/ethtool/netlink.c:369 ethnl_default_doit+0x87a/0xa20
+
+and a splat.
+
+As usually with such warnings three conditions must be met for the warning
+to trigger:
+ - there must be no skb size rounding up (e.g. reply_size of 684);
+ - string set must be per-device (so that the header gets populated);
+ - the device name must be at least 12 characters long.
+
+all in all with current user space it looks like reading priv flags
+is the only place this could potentially happen. Or with syzbot :)
+
+Reported-by: syzbot+59aa77b92d06cd5a54f2@syzkaller.appspotmail.com
+Fixes: 71921690f974 ("ethtool: provide string sets with STRSET_GET request")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/strset.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
+index c3a5489964cd..9908b922cce8 100644
+--- a/net/ethtool/strset.c
++++ b/net/ethtool/strset.c
+@@ -328,6 +328,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
+       int len = 0;
+       int ret;
++      len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
++
+       for (i = 0; i < ETH_SS_COUNT; i++) {
+               const struct strset_info *set_info = &data->sets[i];
+-- 
+2.30.2
+
diff --git a/queue-5.12/ice-add-ndo_bpf-callback-for-safe-mode-netdev-ops.patch b/queue-5.12/ice-add-ndo_bpf-callback-for-safe-mode-netdev-ops.patch
new file mode 100644 (file)
index 0000000..1fb1c8c
--- /dev/null
@@ -0,0 +1,70 @@
+From c95d4fc3d6bcc9bb0e065ea091f4767db8a9ea53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 May 2021 08:34:59 +0200
+Subject: ice: add ndo_bpf callback for safe mode netdev ops
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit ebc5399ea1dfcddac31974091086a3379141899b ]
+
+ice driver requires a programmable pipeline firmware package in order to
+have a support for advanced features. Otherwise, driver falls back to so
+called 'safe mode'. For that mode, ndo_bpf callback is not exposed and
+when user tries to load XDP program, the following happens:
+
+$ sudo ./xdp1 enp179s0f1
+libbpf: Kernel error message: Underlying driver does not support XDP in native mode
+link set xdp fd failed
+
+which is sort of confusing, as there is a native XDP support, but not in
+the current mode. Improve the user experience by providing the specific
+ndo_bpf callback dedicated for safe mode which will make use of extack
+to explicitly let the user know that the DDP package is missing and
+that's the reason that the XDP can't be loaded onto interface currently.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Fixes: efc2214b6047 ("ice: Add support for XDP")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index d821c687f239..b61cd84be97f 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2554,6 +2554,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+       return (ret || xdp_ring_err) ? -ENOMEM : 0;
+ }
++/**
++ * ice_xdp_safe_mode - XDP handler for safe mode
++ * @dev: netdevice
++ * @xdp: XDP command
++ */
++static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
++                           struct netdev_bpf *xdp)
++{
++      NL_SET_ERR_MSG_MOD(xdp->extack,
++                         "Please provide working DDP firmware package in order to use XDP\n"
++                         "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
++      return -EOPNOTSUPP;
++}
++
+ /**
+  * ice_xdp - implements XDP handler
+  * @dev: netdevice
+@@ -6805,6 +6819,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = {
+       .ndo_change_mtu = ice_change_mtu,
+       .ndo_get_stats64 = ice_get_stats64,
+       .ndo_tx_timeout = ice_tx_timeout,
++      .ndo_bpf = ice_xdp_safe_mode,
+ };
+ static const struct net_device_ops ice_netdev_ops = {
+-- 
+2.30.2
+
diff --git a/queue-5.12/ice-parameterize-functions-responsible-for-tx-ring-m.patch b/queue-5.12/ice-parameterize-functions-responsible-for-tx-ring-m.patch
new file mode 100644 (file)
index 0000000..e4527b9
--- /dev/null
@@ -0,0 +1,120 @@
+From ddc3d0f739329e4ce0f127baeb337a421b6aeff5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 May 2021 08:35:00 +0200
+Subject: ice: parameterize functions responsible for Tx ring management
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 2e84f6b3773f43263124c76499c0c4ec3f40aa9b ]
+
+Commit ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match
+number of Rx queues") tried to address the incorrect setting of XDP
+queue count that was based on the Tx queue count, whereas in theory we
+should provide the XDP queue per Rx queue. However, the routines that
+setup and destroy the set of Tx resources are still based on the
+vsi->num_txq.
+
+Ice supports the asynchronous Tx/Rx queue count, so for a setup where
+vsi->num_txq > vsi->num_rxq, ice_vsi_stop_tx_rings and ice_vsi_cfg_txqs
+will be accessing the vsi->xdp_rings out of the bounds.
+
+Parameterize two mentioned functions so they get the size of Tx resources
+array as the input.
+
+Fixes: ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match number of Rx queues")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 27e439853c3b..55432ea360ad 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -1715,12 +1715,13 @@ setup_rings:
+  * ice_vsi_cfg_txqs - Configure the VSI for Tx
+  * @vsi: the VSI being configured
+  * @rings: Tx ring array to be configured
++ * @count: number of Tx ring array elements
+  *
+  * Return 0 on success and a negative value on error
+  * Configure the Tx VSI for operation.
+  */
+ static int
+-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
++ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
+ {
+       struct ice_aqc_add_tx_qgrp *qg_buf;
+       u16 q_idx = 0;
+@@ -1732,7 +1733,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+       qg_buf->num_txqs = 1;
+-      for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
++      for (q_idx = 0; q_idx < count; q_idx++) {
+               err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+               if (err)
+                       goto err_cfg_txqs;
+@@ -1752,7 +1753,7 @@ err_cfg_txqs:
+  */
+ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+ {
+-      return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
++      return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+ }
+ /**
+@@ -1767,7 +1768,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+       int ret;
+       int i;
+-      ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
++      ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+       if (ret)
+               return ret;
+@@ -1965,17 +1966,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
+  * @rst_src: reset source
+  * @rel_vmvf_num: Relative ID of VF/VM
+  * @rings: Tx ring array to be stopped
++ * @count: number of Tx ring array elements
+  */
+ static int
+ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+-                    u16 rel_vmvf_num, struct ice_ring **rings)
++                    u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
+ {
+       u16 q_idx;
+       if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+               return -EINVAL;
+-      for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
++      for (q_idx = 0; q_idx < count; q_idx++) {
+               struct ice_txq_meta txq_meta = { };
+               int status;
+@@ -2003,7 +2005,7 @@ int
+ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+                         u16 rel_vmvf_num)
+ {
+-      return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
++      return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
+ }
+ /**
+@@ -2012,7 +2014,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+  */
+ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+ {
+-      return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
++      return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
+ }
+ /**
+-- 
+2.30.2
+
diff --git a/queue-5.12/icmp-don-t-send-out-icmp-messages-with-a-source-addr.patch b/queue-5.12/icmp-don-t-send-out-icmp-messages-with-a-source-addr.patch
new file mode 100644 (file)
index 0000000..f062c96
--- /dev/null
@@ -0,0 +1,100 @@
+From bdcc05527dc8a6c299615322fd78f63d6f210d9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 13:04:35 +0200
+Subject: icmp: don't send out ICMP messages with a source address of 0.0.0.0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+[ Upstream commit 321827477360934dc040e9d3c626bf1de6c3ab3c ]
+
+When constructing ICMP response messages, the kernel will try to pick a
+suitable source address for the outgoing packet. However, if no IPv4
+addresses are configured on the system at all, this will fail and we end up
+producing an ICMP message with a source address of 0.0.0.0. This can happen
+on a box routing IPv4 traffic via v6 nexthops, for instance.
+
+Since 0.0.0.0 is not generally routable on the internet, there's a good
+chance that such ICMP messages will never make it back to the sender of the
+original packet that the ICMP message was sent in response to. This, in
+turn, can create connectivity and PMTUd problems for senders. Fortunately,
+RFC7600 reserves a dummy address to be used as a source for ICMP
+messages (192.0.0.8/32), so let's teach the kernel to substitute that
+address as a last resort if the regular source address selection procedure
+fails.
+
+Below is a quick example reproducing this issue with network namespaces:
+
+ip netns add ns0
+ip l add type veth peer netns ns0
+ip l set dev veth0 up
+ip a add 10.0.0.1/24 dev veth0
+ip a add fc00:dead:cafe:42::1/64 dev veth0
+ip r add 10.1.0.0/24 via inet6 fc00:dead:cafe:42::2
+ip -n ns0 l set dev veth0 up
+ip -n ns0 a add fc00:dead:cafe:42::2/64 dev veth0
+ip -n ns0 r add 10.0.0.0/24 via inet6 fc00:dead:cafe:42::1
+ip netns exec ns0 sysctl -w net.ipv4.icmp_ratelimit=0
+ip netns exec ns0 sysctl -w net.ipv4.ip_forward=1
+tcpdump -tpni veth0 -c 2 icmp &
+ping -w 1 10.1.0.1 > /dev/null
+tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
+listening on veth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
+IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 29, seq 1, length 64
+IP 0.0.0.0 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92
+2 packets captured
+2 packets received by filter
+0 packets dropped by kernel
+
+With this patch the above capture changes to:
+IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 31127, seq 1, length 64
+IP 192.0.0.8 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Juliusz Chroboczek <jch@irif.fr>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/uapi/linux/in.h | 3 +++
+ net/ipv4/icmp.c         | 7 +++++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
+index 7d6687618d80..d1b327036ae4 100644
+--- a/include/uapi/linux/in.h
++++ b/include/uapi/linux/in.h
+@@ -289,6 +289,9 @@ struct sockaddr_in {
+ /* Address indicating an error return. */
+ #define       INADDR_NONE             ((unsigned long int) 0xffffffff)
++/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
++#define       INADDR_DUMMY            ((unsigned long int) 0xc0000008)
++
+ /* Network number for local host loopback. */
+ #define       IN_LOOPBACKNET          127
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 616e2dc1c8fa..cd65d3146c30 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+               icmp_param.data_len = room;
+       icmp_param.head_len = sizeof(struct icmphdr);
++      /* if we don't have a source address at this point, fall back to the
++       * dummy address instead of sending out a packet with a source address
++       * of 0.0.0.0
++       */
++      if (!fl4.saddr)
++              fl4.saddr = htonl(INADDR_DUMMY);
++
+       icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ ende:
+       ip_rt_put(rt);
+-- 
+2.30.2
+
diff --git a/queue-5.12/ipv4-fix-device-used-for-dst_alloc-with-local-routes.patch b/queue-5.12/ipv4-fix-device-used-for-dst_alloc-with-local-routes.patch
new file mode 100644 (file)
index 0000000..aa418b2
--- /dev/null
@@ -0,0 +1,116 @@
+From 99b09b5a5dd93929aa3676e37fabe591630fb1cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 18:24:59 -0600
+Subject: ipv4: Fix device used for dst_alloc with local routes
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit b87b04f5019e821c8c6c7761f258402e43500a1f ]
+
+Oliver reported a use case where deleting a VRF device can hang
+waiting for the refcnt to drop to 0. The root cause is that the dst
+is allocated against the VRF device but cached on the loopback
+device.
+
+The use case (added to the selftests) has an implicit VRF crossing
+due to the ordering of the FIB rules (lookup local is before the
+l3mdev rule, but the problem occurs even if the FIB rules are
+re-ordered with local after l3mdev because the VRF table does not
+have a default route to terminate the lookup). The end result is
+is that the FIB lookup returns the loopback device as the nexthop,
+but the ingress device is in a VRF. The mismatch causes the dst
+alloc against the VRF device but then cached on the loopback.
+
+The fix is to bring the trick used for IPv6 (see ip6_rt_get_dev_rcu):
+pick the dst alloc device based the fib lookup result but with checks
+that the result has a nexthop device (e.g., not an unreachable or
+prohibit entry).
+
+Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant")
+Reported-by: Oliver Herms <oliver.peter.herms@gmail.com>
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c                         | 15 +++++++++++++-
+ tools/testing/selftests/net/fib_tests.sh | 25 ++++++++++++++++++++++++
+ 2 files changed, 39 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index d635b4f32d34..09506203156d 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2081,6 +2081,19 @@ martian_source:
+       return err;
+ }
++/* get device for dst_alloc with local routes */
++static struct net_device *ip_rt_get_dev(struct net *net,
++                                      const struct fib_result *res)
++{
++      struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
++      struct net_device *dev = NULL;
++
++      if (nhc)
++              dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
++
++      return dev ? : net->loopback_dev;
++}
++
+ /*
+  *    NOTE. We drop all the packets that has local source
+  *    addresses, because every properly looped back packet
+@@ -2237,7 +2250,7 @@ local_input:
+               }
+       }
+-      rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
++      rth = rt_dst_alloc(ip_rt_get_dev(net, res),
+                          flags | RTCF_LOCAL, res->type,
+                          IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+       if (!rth)
+diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
+index 2b5707738609..6fad54c7ecb4 100755
+--- a/tools/testing/selftests/net/fib_tests.sh
++++ b/tools/testing/selftests/net/fib_tests.sh
+@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
+       ipv4_rt_replace_mpath
+ }
++# checks that cached input route on VRF port is deleted
++# when VRF is deleted
++ipv4_local_rt_cache()
++{
++      run_cmd "ip addr add 10.0.0.1/32 dev lo"
++      run_cmd "ip netns add test-ns"
++      run_cmd "ip link add veth-outside type veth peer name veth-inside"
++      run_cmd "ip link add vrf-100 type vrf table 1100"
++      run_cmd "ip link set veth-outside master vrf-100"
++      run_cmd "ip link set veth-inside netns test-ns"
++      run_cmd "ip link set veth-outside up"
++      run_cmd "ip link set vrf-100 up"
++      run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
++      run_cmd "ip netns exec test-ns ip link set veth-inside up"
++      run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
++      run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
++      run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
++      run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
++      run_cmd "ip link delete vrf-100"
++
++      # if we do not hang test is a success
++      log_test $? 0 "Cached route removed from VRF port device"
++}
++
+ ipv4_route_test()
+ {
+       route_setup
+       ipv4_rt_add
+       ipv4_rt_replace
++      ipv4_local_rt_cache
+       route_cleanup
+ }
+-- 
+2.30.2
+
diff --git a/queue-5.12/lantiq-net-fix-duplicated-skb-in-rx-descriptor-ring.patch b/queue-5.12/lantiq-net-fix-duplicated-skb-in-rx-descriptor-ring.patch
new file mode 100644 (file)
index 0000000..41ae68a
--- /dev/null
@@ -0,0 +1,54 @@
+From 2f725f00968081401b77d178b83e6ed4f50e0dda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 22:42:57 +0200
+Subject: lantiq: net: fix duplicated skb in rx descriptor ring
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit 7ea6cd16f1599c1eac6018751eadbc5fc736b99a ]
+
+The previous commit didn't fix the bug properly. By mistake, it replaces
+the pointer of the next skb in the descriptor ring instead of the current
+one. As a result, the two descriptors are assigned the same SKB. The error
+is seen during the iperf test when skb_put tries to insert a second packet
+and exceeds the available buffer.
+
+Fixes: c7718ee96dbc ("net: lantiq: fix memory corruption in RX ring ")
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index 3da494df72f3..072075bc60ee 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
+ static int xrx200_alloc_skb(struct xrx200_chan *ch)
+ {
++      struct sk_buff *skb = ch->skb[ch->dma.desc];
+       dma_addr_t mapping;
+       int ret = 0;
+@@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
+                                XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
+               dev_kfree_skb_any(ch->skb[ch->dma.desc]);
++              ch->skb[ch->dma.desc] = skb;
+               ret = -ENOMEM;
+               goto skip;
+       }
+@@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
+       ch->dma.desc %= LTQ_DESC_NUM;
+       if (ret) {
+-              ch->skb[ch->dma.desc] = skb;
+               net_dev->stats.rx_dropped++;
+               netdev_err(net_dev, "failed to allocate new rx buffer\n");
+               return ret;
+-- 
+2.30.2
+
diff --git a/queue-5.12/libbpf-fixes-incorrect-rx_ring_setup_done.patch b/queue-5.12/libbpf-fixes-incorrect-rx_ring_setup_done.patch
new file mode 100644 (file)
index 0000000..2e18a82
--- /dev/null
@@ -0,0 +1,44 @@
+From b4b0a25d7992e13e0786ad8602b42f7ca654bc67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Jun 2021 14:08:35 +0100
+Subject: libbpf: Fixes incorrect rx_ring_setup_done
+
+From: Kev Jackson <foamdino@gmail.com>
+
+[ Upstream commit 11fc79fc9f2e395aa39fa5baccae62767c5d8280 ]
+
+When calling xsk_socket__create_shared(), the logic at line 1097 marks a
+boolean flag true within the xsk_umem structure to track setup progress
+in order to support multiple calls to the function.  However, instead of
+marking umem->tx_ring_setup_done, the code incorrectly sets
+umem->rx_ring_setup_done.  This leads to improper behaviour when
+creating and destroying xsk and umem structures.
+
+Multiple calls to this function is documented as supported.
+
+Fixes: ca7a83e2487a ("libbpf: Only create rx and tx XDP rings when necessary")
+Signed-off-by: Kev Jackson <foamdino@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/bpf/YL4aU4f3Aaik7CN0@linux-dev
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/bpf/xsk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
+index 007fe5d59438..fe2bec500bf6 100644
+--- a/tools/lib/bpf/xsk.c
++++ b/tools/lib/bpf/xsk.c
+@@ -928,7 +928,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+                       goto out_put_ctx;
+               }
+               if (xsk->fd == umem->fd)
+-                      umem->rx_ring_setup_done = true;
++                      umem->tx_ring_setup_done = true;
+       }
+       err = xsk_get_mmap_offsets(xsk->fd, &off);
+-- 
+2.30.2
+
diff --git a/queue-5.12/mac80211-fix-skb-length-check-in-ieee80211_scan_rx.patch b/queue-5.12/mac80211-fix-skb-length-check-in-ieee80211_scan_rx.patch
new file mode 100644 (file)
index 0000000..a20ec57
--- /dev/null
@@ -0,0 +1,61 @@
+From 967dadfa726c529e2a84fa73398a9864400fc27b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 May 2021 12:16:49 +0800
+Subject: mac80211: fix skb length check in ieee80211_scan_rx()
+
+From: Du Cheng <ducheng2@gmail.com>
+
+[ Upstream commit e298aa358f0ca658406d524b6639fe389cb6e11e ]
+
+Replace hard-coded compile-time constants for header length check
+with dynamic determination based on the frame type. Otherwise, we
+hit a validation WARN_ON in cfg80211 later.
+
+Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results")
+Reported-by: syzbot+405843667e93b9790fc1@syzkaller.appspotmail.com
+Signed-off-by: Du Cheng <ducheng2@gmail.com>
+Link: https://lore.kernel.org/r/20210510041649.589754-1-ducheng2@gmail.com
+[style fixes, reword commit message]
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/scan.c | 21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
+index d4cc9ac2d703..6b50cb5e0e3c 100644
+--- a/net/mac80211/scan.c
++++ b/net/mac80211/scan.c
+@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
+       struct ieee80211_mgmt *mgmt = (void *)skb->data;
+       struct ieee80211_bss *bss;
+       struct ieee80211_channel *channel;
++      size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
++                                    u.probe_resp.variable);
++
++      if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
++          !ieee80211_is_beacon(mgmt->frame_control) &&
++          !ieee80211_is_s1g_beacon(mgmt->frame_control))
++              return;
+       if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
+-              if (skb->len < 15)
+-                      return;
+-      } else if (skb->len < 24 ||
+-               (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+-                !ieee80211_is_beacon(mgmt->frame_control)))
++              if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
++                      min_hdr_len = offsetof(struct ieee80211_ext,
++                                             u.s1g_short_beacon.variable);
++              else
++                      min_hdr_len = offsetof(struct ieee80211_ext,
++                                             u.s1g_beacon);
++      }
++
++      if (skb->len < min_hdr_len)
+               return;
+       sdata1 = rcu_dereference(local->scan_sdata);
+-- 
+2.30.2
+
diff --git a/queue-5.12/mlxsw-core-set-thermal-zone-polling-delay-argument-t.patch b/queue-5.12/mlxsw-core-set-thermal-zone-polling-delay-argument-t.patch
new file mode 100644 (file)
index 0000000..7a733f9
--- /dev/null
@@ -0,0 +1,60 @@
+From e93617ddfd3ca6507c5b20118982cac8f4325bc2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:24:32 +0300
+Subject: mlxsw: core: Set thermal zone polling delay argument to real value at
+ init
+
+From: Mykola Kostenok <c_mykolak@nvidia.com>
+
+[ Upstream commit 2fd8d84ce3095e8a7b5fe96532c91b1b9e07339c ]
+
+Thermal polling delay argument for modules and gearboxes thermal zones
+used to be initialized with zero value, while actual delay was used to
+be set by mlxsw_thermal_set_mode() by thermal operation callback
+set_mode(). After operations set_mode()/get_mode() have been removed by
+cited commits, modules and gearboxes thermal zones always have polling
+time set to zero and do not perform temperature monitoring.
+
+Set non-zero "polling_delay" in thermal_zone_device_register() routine,
+thus, the relevant thermal zones will perform thermal monitoring.
+
+Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
+Fixes: 5d7bd8aa7c35 ("thermal: Simplify or eliminate unnecessary set_mode() methods")
+Fixes: 1ee14820fd8e ("thermal: remove get_mode() operation of drivers")
+Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
+Acked-by: Vadim Pasternak <vadimp@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+index bf85ce9835d7..42e4437ac3c1 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+@@ -708,7 +708,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
+                                                       MLXSW_THERMAL_TRIP_MASK,
+                                                       module_tz,
+                                                       &mlxsw_thermal_module_ops,
+-                                                      NULL, 0, 0);
++                                                      NULL, 0,
++                                                      module_tz->parent->polling_delay);
+       if (IS_ERR(module_tz->tzdev)) {
+               err = PTR_ERR(module_tz->tzdev);
+               return err;
+@@ -830,7 +831,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+                                               MLXSW_THERMAL_TRIP_MASK,
+                                               gearbox_tz,
+                                               &mlxsw_thermal_gearbox_ops,
+-                                              NULL, 0, 0);
++                                              NULL, 0,
++                                              gearbox_tz->parent->polling_delay);
+       if (IS_ERR(gearbox_tz->tzdev))
+               return PTR_ERR(gearbox_tz->tzdev);
+-- 
+2.30.2
+
diff --git a/queue-5.12/mlxsw-reg-spectrum-3-enforce-lowest-max-shaper-burst.patch b/queue-5.12/mlxsw-reg-spectrum-3-enforce-lowest-max-shaper-burst.patch
new file mode 100644 (file)
index 0000000..2b71a2e
--- /dev/null
@@ -0,0 +1,47 @@
+From 18cd184b19485619b40af475182474ec5e3a42f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:24:30 +0300
+Subject: mlxsw: reg: Spectrum-3: Enforce lowest max-shaper burst size of 11
+
+From: Petr Machata <petrm@nvidia.com>
+
+[ Upstream commit 306b9228c097b4101c150ccd262372ded8348644 ]
+
+A max-shaper is the HW component responsible for delaying egress traffic
+above a configured transmission rate. Burst size is the amount of traffic
+that is allowed to pass without accounting. The burst size value needs to
+be such that it can be expressed as 2^BS * 512 bits, where BS lies in a
+certain ASIC-dependent range. mlxsw enforces that this holds before
+attempting to configure the shaper.
+
+The assumption for Spectrum-3 was that the lower limit of BS would be 5,
+like for Spectrum-1. But as of now, the limit is still 11. Therefore fix
+the driver accordingly, so that incorrect values are rejected early with a
+proper message.
+
+Fixes: 23effa2479ba ("mlxsw: reg: Add max_shaper_bs to QoS ETS Element Configuration")
+Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+index c4adc7f740d3..769386971ac3 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+@@ -3863,7 +3863,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
+ #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS      25
+ #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1   5
+ #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2   11
+-#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3   5
++#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3   11
+ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
+                                      enum mlxsw_reg_qeec_hr hr, u8 index,
+-- 
+2.30.2
+
diff --git a/queue-5.12/mptcp-do-not-warn-on-bad-input-from-the-network.patch b/queue-5.12/mptcp-do-not-warn-on-bad-input-from-the-network.patch
new file mode 100644 (file)
index 0000000..33fcb0c
--- /dev/null
@@ -0,0 +1,63 @@
+From 429f59e778f66516b574411ca0eff7c2757aae67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:42 -0700
+Subject: mptcp: do not warn on bad input from the network
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 61e710227e97172355d5f150d5c78c64175d9fb2 ]
+
+warn_bad_map() produces a kernel WARN on bad input coming
+from the network. Use pr_debug() to avoid spamming the system
+log.
+
+Additionally, when the right bound check fails, warn_bad_map() reports
+the wrong ssn value, let's fix it.
+
+Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/107
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/subflow.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 1ee4d106ce1c..98a5a68ec15d 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -754,10 +754,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
+       return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
+ }
+-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
++static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+ {
+-      WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+-                ssn, subflow->map_subflow_seq, subflow->map_data_len);
++      pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
++               ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ }
+ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
+@@ -782,13 +782,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
+               /* Mapping covers data later in the subflow stream,
+                * currently unsupported.
+                */
+-              warn_bad_map(subflow, ssn);
++              dbg_bad_map(subflow, ssn);
+               return false;
+       }
+       if (unlikely(!before(ssn, subflow->map_subflow_seq +
+                                 subflow->map_data_len))) {
+               /* Mapping does covers past subflow data, invalid */
+-              warn_bad_map(subflow, ssn + skb->len);
++              dbg_bad_map(subflow, ssn);
+               return false;
+       }
+       return true;
+-- 
+2.30.2
+
diff --git a/queue-5.12/mptcp-fix-out-of-bounds-when-parsing-tcp-options.patch b/queue-5.12/mptcp-fix-out-of-bounds-when-parsing-tcp-options.patch
new file mode 100644 (file)
index 0000000..a1f383b
--- /dev/null
@@ -0,0 +1,44 @@
+From be2b06a38721f9a75fcf5f39788e3deef7b04693 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:30 +0300
+Subject: mptcp: Fix out of bounds when parsing TCP options
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit 07718be265680dcf496347d475ce1a5442f55ad7 ]
+
+The TCP option parser in mptcp (mptcp_get_options) could read one byte
+out of bounds. When the length is 1, the execution flow gets into the
+loop, reads one byte of the opcode, and if the opcode is neither
+TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds the
+length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/options.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/mptcp/options.c b/net/mptcp/options.c
+index 8848a9e2a95b..47d90cf31f12 100644
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -337,6 +337,8 @@ void mptcp_get_options(const struct sk_buff *skb,
+                       length--;
+                       continue;
+               default:
++                      if (length < 2)
++                              return;
+                       opsize = *ptr++;
+                       if (opsize < 2) /* "silly options" */
+                               return;
+-- 
+2.30.2
+
diff --git a/queue-5.12/mptcp-fix-soft-lookup-in-subflow_error_report.patch b/queue-5.12/mptcp-fix-soft-lookup-in-subflow_error_report.patch
new file mode 100644 (file)
index 0000000..55eb945
--- /dev/null
@@ -0,0 +1,207 @@
+From 1f7da5e67391141e8c8fea8ea920cbd4d41be9fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:44 -0700
+Subject: mptcp: fix soft lookup in subflow_error_report()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 499ada5073361c631f2a3c4a8aed44d53b6f82ec ]
+
+Maxim reported a soft lookup in subflow_error_report():
+
+ watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:0]
+ RIP: 0010:native_queued_spin_lock_slowpath
+ RSP: 0018:ffffa859c0003bc0 EFLAGS: 00000202
+ RAX: 0000000000000101 RBX: 0000000000000001 RCX: 0000000000000000
+ RDX: ffff9195c2772d88 RSI: 0000000000000000 RDI: ffff9195c2772d88
+ RBP: ffff9195c2772d00 R08: 00000000000067b0 R09: c6e31da9eb1e44f4
+ R10: ffff9195ef379700 R11: ffff9195edb50710 R12: ffff9195c2772d88
+ R13: ffff9195f500e3d0 R14: ffff9195ef379700 R15: ffff9195ef379700
+ FS:  0000000000000000(0000) GS:ffff91961f400000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000000c000407000 CR3: 0000000002988000 CR4: 00000000000006f0
+ Call Trace:
+  <IRQ>
+ _raw_spin_lock_bh
+ subflow_error_report
+ mptcp_subflow_data_available
+ __mptcp_move_skbs_from_subflow
+ mptcp_data_ready
+ tcp_data_queue
+ tcp_rcv_established
+ tcp_v4_do_rcv
+ tcp_v4_rcv
+ ip_protocol_deliver_rcu
+ ip_local_deliver_finish
+ __netif_receive_skb_one_core
+ netif_receive_skb
+ rtl8139_poll 8139too
+ __napi_poll
+ net_rx_action
+ __do_softirq
+ __irq_exit_rcu
+ common_interrupt
+  </IRQ>
+
+The calling function - mptcp_subflow_data_available() - can be invoked
+from different contexts:
+- plain ssk socket lock
+- ssk socket lock + mptcp_data_lock
+- ssk socket lock + mptcp_data_lock + msk socket lock.
+
+Since subflow_error_report() tries to acquire the mptcp_data_lock, the
+latter two call chains will cause soft lookup.
+
+This change addresses the issue moving the error reporting call to
+outer functions, where the held locks list is known and the we can
+acquire only the needed one.
+
+Reported-by: Maxim Galaganov <max@internet.ru>
+Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/199
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c |  9 ++++++
+ net/mptcp/subflow.c  | 75 +++++++++++++++++++++++---------------------
+ 2 files changed, 48 insertions(+), 36 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 78152b0820ce..d8187ac06539 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -699,6 +699,12 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+       __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+       __mptcp_ofo_queue(msk);
++      if (unlikely(ssk->sk_err)) {
++              if (!sock_owned_by_user(sk))
++                      __mptcp_error_report(sk);
++              else
++                      set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
++      }
+       /* If the moves have caught up with the DATA_FIN sequence number
+        * it's time to ack the DATA_FIN and change socket state, but
+@@ -1932,6 +1938,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
+               done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+               mptcp_data_unlock(sk);
+               tcp_cleanup_rbuf(ssk, moved);
++
++              if (unlikely(ssk->sk_err))
++                      __mptcp_error_report(sk);
+               unlock_sock_fast(ssk, slowpath);
+       } while (!done);
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 98a5a68ec15d..d6d8ad4f918e 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1033,7 +1033,6 @@ fallback:
+                * subflow_error_report() will introduce the appropriate barriers
+                */
+               ssk->sk_err = EBADMSG;
+-              ssk->sk_error_report(ssk);
+               tcp_set_state(ssk, TCP_CLOSE);
+               tcp_send_active_reset(ssk, GFP_ATOMIC);
+               WRITE_ONCE(subflow->data_avail, 0);
+@@ -1086,41 +1085,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
+       *full_space = tcp_full_space(sk);
+ }
+-static void subflow_data_ready(struct sock *sk)
+-{
+-      struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+-      u16 state = 1 << inet_sk_state_load(sk);
+-      struct sock *parent = subflow->conn;
+-      struct mptcp_sock *msk;
+-
+-      msk = mptcp_sk(parent);
+-      if (state & TCPF_LISTEN) {
+-              /* MPJ subflow are removed from accept queue before reaching here,
+-               * avoid stray wakeups
+-               */
+-              if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+-                      return;
+-
+-              set_bit(MPTCP_DATA_READY, &msk->flags);
+-              parent->sk_data_ready(parent);
+-              return;
+-      }
+-
+-      WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+-                   !subflow->mp_join && !(state & TCPF_CLOSE));
+-
+-      if (mptcp_subflow_data_available(sk))
+-              mptcp_data_ready(parent, sk);
+-}
+-
+-static void subflow_write_space(struct sock *ssk)
+-{
+-      struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+-
+-      mptcp_propagate_sndbuf(sk, ssk);
+-      mptcp_write_space(sk);
+-}
+-
+ void __mptcp_error_report(struct sock *sk)
+ {
+       struct mptcp_subflow_context *subflow;
+@@ -1161,6 +1125,43 @@ static void subflow_error_report(struct sock *ssk)
+       mptcp_data_unlock(sk);
+ }
++static void subflow_data_ready(struct sock *sk)
++{
++      struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
++      u16 state = 1 << inet_sk_state_load(sk);
++      struct sock *parent = subflow->conn;
++      struct mptcp_sock *msk;
++
++      msk = mptcp_sk(parent);
++      if (state & TCPF_LISTEN) {
++              /* MPJ subflow are removed from accept queue before reaching here,
++               * avoid stray wakeups
++               */
++              if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
++                      return;
++
++              set_bit(MPTCP_DATA_READY, &msk->flags);
++              parent->sk_data_ready(parent);
++              return;
++      }
++
++      WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
++                   !subflow->mp_join && !(state & TCPF_CLOSE));
++
++      if (mptcp_subflow_data_available(sk))
++              mptcp_data_ready(parent, sk);
++      else if (unlikely(sk->sk_err))
++              subflow_error_report(sk);
++}
++
++static void subflow_write_space(struct sock *ssk)
++{
++      struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
++
++      mptcp_propagate_sndbuf(sk, ssk);
++      mptcp_write_space(sk);
++}
++
+ static struct inet_connection_sock_af_ops *
+ subflow_default_af_ops(struct sock *sk)
+ {
+@@ -1469,6 +1470,8 @@ static void subflow_state_change(struct sock *sk)
+        */
+       if (mptcp_subflow_data_available(sk))
+               mptcp_data_ready(parent, sk);
++      else if (unlikely(sk->sk_err))
++              subflow_error_report(sk);
+       subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+-- 
+2.30.2
+
diff --git a/queue-5.12/mptcp-try-harder-to-borrow-memory-from-subflow-under.patch b/queue-5.12/mptcp-try-harder-to-borrow-memory-from-subflow-under.patch
new file mode 100644 (file)
index 0000000..32aa4b9
--- /dev/null
@@ -0,0 +1,54 @@
+From 718e9386f0ebbb4ea31224c97db0076eeee4bede Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:40 -0700
+Subject: mptcp: try harder to borrow memory from subflow under pressure
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 72f961320d5d15bfcb26dbe3edaa3f7d25fd2c8a ]
+
+If the host is under sever memory pressure, and RX forward
+memory allocation for the msk fails, we try to borrow the
+required memory from the ingress subflow.
+
+The current attempt is a bit flaky: if skb->truesize is less
+than SK_MEM_QUANTUM, the ssk will not release any memory, and
+the next schedule will fail again.
+
+Instead, directly move the required amount of pages from the
+ssk to the msk, if available
+
+Fixes: 9c3f94e1681b ("mptcp: add missing memory scheduling in the rx path")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 225b98821517..1d981babbcfe 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -287,11 +287,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
+       /* try to fetch required memory from subflow */
+       if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
+-              if (ssk->sk_forward_alloc < skb->truesize)
+-                      goto drop;
+-              __sk_mem_reclaim(ssk, skb->truesize);
+-              if (!sk_rmem_schedule(sk, skb, skb->truesize))
++              int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
++
++              if (ssk->sk_forward_alloc < amount)
+                       goto drop;
++
++              ssk->sk_forward_alloc -= amount;
++              sk->sk_forward_alloc += amount;
+       }
+       /* the skb map_seq accounts for the skb offset:
+-- 
+2.30.2
+
diff --git a/queue-5.12/mptcp-wake-up-readers-only-for-in-sequence-data.patch b/queue-5.12/mptcp-wake-up-readers-only-for-in-sequence-data.patch
new file mode 100644 (file)
index 0000000..487cda1
--- /dev/null
@@ -0,0 +1,207 @@
+From 54fea02c17b8ff0e624c79dbeb56b54362b4f452 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:41 -0700
+Subject: mptcp: wake-up readers only for in sequence data
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 99d1055ce2469dca3dd14be0991ff8133e25e3d0 ]
+
+Currently we rely on the subflow->data_avail field, which is subject to
+races:
+
+       ssk1
+               skb len = 500 DSS(seq=1, len=1000, off=0)
+               # data_avail == MPTCP_SUBFLOW_DATA_AVAIL
+
+       ssk2
+               skb len = 500 DSS(seq = 501, len=1000)
+               # data_avail == MPTCP_SUBFLOW_DATA_AVAIL
+
+       ssk1
+               skb len = 500 DSS(seq = 1, len=1000, off =500)
+               # still data_avail == MPTCP_SUBFLOW_DATA_AVAIL,
+               # as the skb is covered by a pre-existing map,
+               # which was in-sequence at reception time.
+
+Instead we can explicitly check if some has been received in-sequence,
+propagating the info from __mptcp_move_skbs_from_subflow().
+
+Additionally add the 'ONCE' annotation to the 'data_avail' memory
+access, as msk will read it outside the subflow socket lock.
+
+Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 33 ++++++++++++---------------------
+ net/mptcp/protocol.h |  1 -
+ net/mptcp/subflow.c  | 23 +++++++++--------------
+ 3 files changed, 21 insertions(+), 36 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 1d981babbcfe..78152b0820ce 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -689,15 +689,13 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
+ /* In most cases we will be able to lock the mptcp socket.  If its already
+  * owned, we need to defer to the work queue to avoid ABBA deadlock.
+  */
+-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
++static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+ {
+       struct sock *sk = (struct sock *)msk;
+       unsigned int moved = 0;
+       if (inet_sk_state_load(sk) == TCP_CLOSE)
+-              return;
+-
+-      mptcp_data_lock(sk);
++              return false;
+       __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+       __mptcp_ofo_queue(msk);
+@@ -709,7 +707,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+        */
+       if (mptcp_pending_data_fin(sk, NULL))
+               mptcp_schedule_work(sk);
+-      mptcp_data_unlock(sk);
++      return moved > 0;
+ }
+ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+@@ -717,7 +715,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       int sk_rbuf, ssk_rbuf;
+-      bool wake;
+       /* The peer can send data while we are shutting down this
+        * subflow at msk destruction time, but we must avoid enqueuing
+@@ -726,28 +723,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
+       if (unlikely(subflow->disposable))
+               return;
+-      /* move_skbs_to_msk below can legitly clear the data_avail flag,
+-       * but we will need later to properly woke the reader, cache its
+-       * value
+-       */
+-      wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
+-      if (wake)
+-              set_bit(MPTCP_DATA_READY, &msk->flags);
+-
+       ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
+       sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
+       if (unlikely(ssk_rbuf > sk_rbuf))
+               sk_rbuf = ssk_rbuf;
+-      /* over limit? can't append more skbs to msk */
++      /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
+       if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
+-              goto wake;
+-
+-      move_skbs_to_msk(msk, ssk);
++              return;
+-wake:
+-      if (wake)
++      /* Wake-up the reader only for in-sequence data */
++      mptcp_data_lock(sk);
++      if (move_skbs_to_msk(msk, ssk)) {
++              set_bit(MPTCP_DATA_READY, &msk->flags);
+               sk->sk_data_ready(sk);
++      }
++      mptcp_data_unlock(sk);
+ }
+ void __mptcp_flush_join_list(struct mptcp_sock *msk)
+@@ -850,7 +841,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
+       sock_owned_by_me(sk);
+       mptcp_for_each_subflow(msk, subflow) {
+-              if (subflow->data_avail)
++              if (READ_ONCE(subflow->data_avail))
+                       return mptcp_subflow_tcp_sock(subflow);
+       }
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index e21a5bc36cf0..14e89e4bd4a8 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -372,7 +372,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
+ enum mptcp_data_avail {
+       MPTCP_SUBFLOW_NODATA,
+       MPTCP_SUBFLOW_DATA_AVAIL,
+-      MPTCP_SUBFLOW_OOO_DATA
+ };
+ struct mptcp_delegated_action {
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 8425cd393bf3..1ee4d106ce1c 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -974,7 +974,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
+       pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
+                subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
+       if (!skb_peek(&ssk->sk_receive_queue))
+-              subflow->data_avail = 0;
++              WRITE_ONCE(subflow->data_avail, 0);
+       if (subflow->data_avail)
+               return true;
+@@ -1012,18 +1012,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
+               ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
+               pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
+                        ack_seq);
+-              if (ack_seq == old_ack) {
+-                      subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+-                      break;
+-              } else if (after64(ack_seq, old_ack)) {
+-                      subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
+-                      break;
++              if (unlikely(before64(ack_seq, old_ack))) {
++                      mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
++                      continue;
+               }
+-              /* only accept in-sequence mapping. Old values are spurious
+-               * retransmission
+-               */
+-              mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
++              WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
++              break;
+       }
+       return true;
+@@ -1041,7 +1036,7 @@ fallback:
+               ssk->sk_error_report(ssk);
+               tcp_set_state(ssk, TCP_CLOSE);
+               tcp_send_active_reset(ssk, GFP_ATOMIC);
+-              subflow->data_avail = 0;
++              WRITE_ONCE(subflow->data_avail, 0);
+               return false;
+       }
+@@ -1051,7 +1046,7 @@ fallback:
+       subflow->map_seq = READ_ONCE(msk->ack_seq);
+       subflow->map_data_len = skb->len;
+       subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+-      subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
++      WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+       return true;
+ }
+@@ -1063,7 +1058,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
+       if (subflow->map_valid &&
+           mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
+               subflow->map_valid = 0;
+-              subflow->data_avail = 0;
++              WRITE_ONCE(subflow->data_avail, 0);
+               pr_debug("Done with mapping: seq=%u data_len=%u",
+                        subflow->map_subflow_seq,
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-af_unix-fix-a-data-race-in-unix_dgram_sendmsg-un.patch b/queue-5.12/net-af_unix-fix-a-data-race-in-unix_dgram_sendmsg-un.patch
new file mode 100644 (file)
index 0000000..ba35b0e
--- /dev/null
@@ -0,0 +1,96 @@
+From b7c00b482b46f791ce5ccc61fa6a12a82da8c6a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 07:47:15 -0700
+Subject: net/af_unix: fix a data-race in unix_dgram_sendmsg /
+ unix_release_sock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a494bd642d9120648b06bb7d28ce6d05f55a7819 ]
+
+While unix_may_send(sk, osk) is called while osk is locked, it appears
+unix_release_sock() can overwrite unix_peer() after this lock has been
+released, making KCSAN unhappy.
+
+Changing unix_release_sock() to access/change unix_peer()
+before lock is released should fix this issue.
+
+BUG: KCSAN: data-race in unix_dgram_sendmsg / unix_release_sock
+
+write to 0xffff88810465a338 of 8 bytes by task 20852 on cpu 1:
+ unix_release_sock+0x4ed/0x6e0 net/unix/af_unix.c:558
+ unix_release+0x2f/0x50 net/unix/af_unix.c:859
+ __sock_release net/socket.c:599 [inline]
+ sock_close+0x6c/0x150 net/socket.c:1258
+ __fput+0x25b/0x4e0 fs/file_table.c:280
+ ____fput+0x11/0x20 fs/file_table.c:313
+ task_work_run+0xae/0x130 kernel/task_work.c:164
+ tracehook_notify_resume include/linux/tracehook.h:189 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:175 [inline]
+ exit_to_user_mode_prepare+0x156/0x190 kernel/entry/common.c:209
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
+ syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:302
+ do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff88810465a338 of 8 bytes by task 20888 on cpu 0:
+ unix_may_send net/unix/af_unix.c:189 [inline]
+ unix_dgram_sendmsg+0x923/0x1610 net/unix/af_unix.c:1712
+ sock_sendmsg_nosec net/socket.c:654 [inline]
+ sock_sendmsg net/socket.c:674 [inline]
+ ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
+ ___sys_sendmsg net/socket.c:2404 [inline]
+ __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
+ __do_sys_sendmmsg net/socket.c:2519 [inline]
+ __se_sys_sendmmsg net/socket.c:2516 [inline]
+ __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
+ do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0xffff888167905400 -> 0x0000000000000000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 20888 Comm: syz-executor.0 Not tainted 5.13.0-rc5-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 5a31307ceb76..5d1192ceb139 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -535,12 +535,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
+       u->path.mnt = NULL;
+       state = sk->sk_state;
+       sk->sk_state = TCP_CLOSE;
++
++      skpair = unix_peer(sk);
++      unix_peer(sk) = NULL;
++
+       unix_state_unlock(sk);
+       wake_up_interruptible_all(&u->peer_wait);
+-      skpair = unix_peer(sk);
+-
+       if (skpair != NULL) {
+               if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+                       unix_state_lock(skpair);
+@@ -555,7 +557,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
+               unix_dgram_peer_wake_disconnect(sk, skpair);
+               sock_put(skpair); /* It may now die */
+-              unix_peer(sk) = NULL;
+       }
+       /* Try to flush out this socket. Throw out buffers at least */
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-cdc_eem-fix-tx-fixup-skb-leak.patch b/queue-5.12/net-cdc_eem-fix-tx-fixup-skb-leak.patch
new file mode 100644 (file)
index 0000000..0e7460c
--- /dev/null
@@ -0,0 +1,44 @@
+From 903b702ae55cf85e6aea0fc6ac5b94319a4904e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Jun 2021 07:32:32 +0800
+Subject: net: cdc_eem: fix tx fixup skb leak
+
+From: Linyu Yuan <linyyuan@codeaurora.org>
+
+[ Upstream commit c3b26fdf1b32f91c7a3bc743384b4a298ab53ad7 ]
+
+when usbnet transmit a skb, eem fixup it in eem_tx_fixup(),
+if skb_copy_expand() failed, it return NULL,
+usbnet_start_xmit() will have no chance to free original skb.
+
+fix it by free orginal skb in eem_tx_fixup() first,
+then check skb clone status, if failed, return NULL to usbnet.
+
+Fixes: 9f722c0978b0 ("usbnet: CDC EEM support (v5)")
+Signed-off-by: Linyu Yuan <linyyuan@codeaurora.org>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/cdc_eem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
+index 0eeec80bec31..e4a570366646 100644
+--- a/drivers/net/usb/cdc_eem.c
++++ b/drivers/net/usb/cdc_eem.c
+@@ -123,10 +123,10 @@ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
+       }
+       skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags);
++      dev_kfree_skb_any(skb);
+       if (!skb2)
+               return NULL;
+-      dev_kfree_skb_any(skb);
+       skb = skb2;
+ done:
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-cdc_ncm-switch-to-eth-d-interface-naming.patch b/queue-5.12/net-cdc_ncm-switch-to-eth-d-interface-naming.patch
new file mode 100644 (file)
index 0000000..0fd2c66
--- /dev/null
@@ -0,0 +1,77 @@
+From 14b26416c1ccba70332dc4494568247712b21af7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 01:05:49 -0700
+Subject: net: cdc_ncm: switch to eth%d interface naming
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Żenczykowski <maze@google.com>
+
+[ Upstream commit c1a3d4067309451e68c33dbd356032549cc0bd8e ]
+
+This is meant to make the host side cdc_ncm interface consistently
+named just like the older CDC protocols: cdc_ether & cdc_ecm
+(and even rndis_host), which all use 'FLAG_ETHER | FLAG_POINTTOPOINT'.
+
+include/linux/usb/usbnet.h:
+  #define FLAG_ETHER   0x0020          /* maybe use "eth%d" names */
+  #define FLAG_WLAN    0x0080          /* use "wlan%d" names */
+  #define FLAG_WWAN    0x0400          /* use "wwan%d" names */
+  #define FLAG_POINTTOPOINT 0x1000     /* possibly use "usb%d" names */
+
+drivers/net/usb/usbnet.c @ line 1711:
+  strcpy (net->name, "usb%d");
+  ...
+  // heuristic:  "usb%d" for links we know are two-host,
+  // else "eth%d" when there's reasonable doubt.  userspace
+  // can rename the link if it knows better.
+  if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
+      ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
+       (net->dev_addr [0] & 0x02) == 0))
+          strcpy (net->name, "eth%d");
+  /* WLAN devices should always be named "wlan%d" */
+  if ((dev->driver_info->flags & FLAG_WLAN) != 0)
+          strcpy(net->name, "wlan%d");
+  /* WWAN devices should always be named "wwan%d" */
+  if ((dev->driver_info->flags & FLAG_WWAN) != 0)
+          strcpy(net->name, "wwan%d");
+
+So by using ETHER | POINTTOPOINT the interface naming is
+either usb%d or eth%d based on the global uniqueness of the
+mac address of the device.
+
+Without this 2.5gbps ethernet dongles which all seem to use the cdc_ncm
+driver end up being called usb%d instead of eth%d even though they're
+definitely not two-host.  (All 1gbps & 5gbps ethernet usb dongles I've
+tested don't hit this problem due to use of different drivers, primarily
+r8152 and aqc111)
+
+Fixes tag is based purely on git blame, and is really just here to make
+sure this hits LTS branches newer than v4.5.
+
+Cc: Lorenzo Colitti <lorenzo@google.com>
+Fixes: 4d06dd537f95 ("cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind")
+Signed-off-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/cdc_ncm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
+index 8acf30115428..dc3d84b43e4e 100644
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -1902,7 +1902,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
+ static const struct driver_info cdc_ncm_info = {
+       .description = "CDC NCM",
+       .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+-                      | FLAG_LINK_INTR,
++                      | FLAG_LINK_INTR | FLAG_ETHER,
+       .bind = cdc_ncm_bind,
+       .unbind = cdc_ncm_unbind,
+       .manage_power = usbnet_manage_power,
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-dsa-felix-re-enable-tx-flow-control-in-ocelot_po.patch b/queue-5.12/net-dsa-felix-re-enable-tx-flow-control-in-ocelot_po.patch
new file mode 100644 (file)
index 0000000..2442dc2
--- /dev/null
@@ -0,0 +1,54 @@
+From 9c7484f893504483e836e7157825dc4b94943831 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 14:15:35 +0300
+Subject: net: dsa: felix: re-enable TX flow control in ocelot_port_flush()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1650bdb1c516c248fb06f6d076559ff6437a5853 ]
+
+Because flow control is set up statically in ocelot_init_port(), and not
+in phylink_mac_link_up(), what happens is that after the blamed commit,
+the flow control remains disabled after the port flushing procedure.
+
+Fixes: eb4733d7cffc ("net: dsa: felix: implement port flushing on .phylink_mac_link_down")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mscc/ocelot.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
+index 46e5c9136bac..0c4c976548c8 100644
+--- a/drivers/net/ethernet/mscc/ocelot.c
++++ b/drivers/net/ethernet/mscc/ocelot.c
+@@ -378,6 +378,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
+ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ {
++      unsigned int pause_ena;
+       int err, val;
+       /* Disable dequeuing from the egress queues */
+@@ -386,6 +387,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
+                      QSYS_PORT_MODE, port);
+       /* Disable flow control */
++      ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+       /* Disable priority flow control */
+@@ -421,6 +423,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
+       /* Clear flushing again. */
+       ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
++      /* Re-enable flow control */
++      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
++
+       return err;
+ }
+ EXPORT_SYMBOL(ocelot_port_flush);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-ena-fix-dma-mapping-function-issues-in-xdp.patch b/queue-5.12/net-ena-fix-dma-mapping-function-issues-in-xdp.patch
new file mode 100644 (file)
index 0000000..fc19758
--- /dev/null
@@ -0,0 +1,150 @@
+From 3c85b12e59de653bc5da0d73b850e4017b67312e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:42:54 +0300
+Subject: net: ena: fix DMA mapping function issues in XDP
+
+From: Shay Agroskin <shayagr@amazon.com>
+
+[ Upstream commit 504fd6a5390c30b1b7670768e314dd5d473da06a ]
+
+This patch fixes several bugs found when (DMA/LLQ) mapping a packet for
+transmission. The mapping procedure makes the transmitted packet
+accessible by the device.
+When using LLQ, this requires copying the packet's header to push header
+(which would be passed to LLQ) and creating DMA mapping for the payload
+(if the packet doesn't fit the maximum push length).
+When not using LLQ, we map the whole packet with DMA.
+
+The following bugs are fixed in the code:
+    1. Add support for non-LLQ machines:
+       The ena_xdp_tx_map_frame() function assumed that LLQ is
+       supported, and never mapped the whole packet using DMA. On some
+       instances, which don't support LLQ, this causes loss of traffic.
+
+    2. Wrong DMA buffer length passed to device:
+       When using LLQ, the first 'tx_max_header_size' bytes of the
+       packet would be copied to push header. The rest of the packet
+       would be copied to a DMA'd buffer.
+
+    3. Freeing the XDP buffer twice in case of a mapping error:
+       In case a buffer DMA mapping fails, the function uses
+       xdp_return_frame_rx_napi() to free the RX buffer and returns from
+       the function with an error. XDP frames that fail to xmit get
+       freed by the kernel and so there is no need for this call.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 54 ++++++++++----------
+ 1 file changed, 28 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 102f2c91fdb8..20f8012bbe04 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+                               struct ena_tx_buffer *tx_info,
+                               struct xdp_frame *xdpf,
+-                              void **push_hdr,
+-                              u32 *push_len)
++                              struct ena_com_tx_ctx *ena_tx_ctx)
+ {
+       struct ena_adapter *adapter = xdp_ring->adapter;
+       struct ena_com_buf *ena_buf;
+-      dma_addr_t dma = 0;
++      int push_len = 0;
++      dma_addr_t dma;
++      void *data;
+       u32 size;
+       tx_info->xdpf = xdpf;
++      data = tx_info->xdpf->data;
+       size = tx_info->xdpf->len;
+-      ena_buf = tx_info->bufs;
+-      /* llq push buffer */
+-      *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+-      *push_hdr = tx_info->xdpf->data;
++      if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
++              /* Designate part of the packet for LLQ */
++              push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
++
++              ena_tx_ctx->push_header = data;
++
++              size -= push_len;
++              data += push_len;
++      }
++
++      ena_tx_ctx->header_len = push_len;
+-      if (size - *push_len > 0) {
++      if (size > 0) {
+               dma = dma_map_single(xdp_ring->dev,
+-                                   *push_hdr + *push_len,
+-                                   size - *push_len,
++                                   data,
++                                   size,
+                                    DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+                       goto error_report_dma_error;
+-              tx_info->map_linear_data = 1;
+-              tx_info->num_of_bufs = 1;
+-      }
++              tx_info->map_linear_data = 0;
+-      ena_buf->paddr = dma;
+-      ena_buf->len = size;
++              ena_buf = tx_info->bufs;
++              ena_buf->paddr = dma;
++              ena_buf->len = size;
++
++              ena_tx_ctx->ena_bufs = ena_buf;
++              ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
++      }
+       return 0;
+@@ -274,10 +286,6 @@ error_report_dma_error:
+                         &xdp_ring->syncp);
+       netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+-      xdp_return_frame_rx_napi(tx_info->xdpf);
+-      tx_info->xdpf = NULL;
+-      tx_info->num_of_bufs = 0;
+-
+       return -EINVAL;
+ }
+@@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+       struct ena_com_tx_ctx ena_tx_ctx = {};
+       struct ena_tx_buffer *tx_info;
+       u16 next_to_use, req_id;
+-      void *push_hdr;
+-      u32 push_len;
+       int rc;
+       next_to_use = xdp_ring->next_to_use;
+@@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+       tx_info = &xdp_ring->tx_buffer_info[req_id];
+       tx_info->num_of_bufs = 0;
+-      rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
++      rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
+       if (unlikely(rc))
+               goto error_drop_packet;
+-      ena_tx_ctx.ena_bufs = tx_info->bufs;
+-      ena_tx_ctx.push_header = push_hdr;
+-      ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+       ena_tx_ctx.req_id = req_id;
+-      ena_tx_ctx.header_len = push_len;
+       rc = ena_xmit_common(dev,
+                            xdp_ring,
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-ethernet-fix-potential-use-after-free-in-ec_bhf_.patch b/queue-5.12/net-ethernet-fix-potential-use-after-free-in-ec_bhf_.patch
new file mode 100644 (file)
index 0000000..59a5930
--- /dev/null
@@ -0,0 +1,56 @@
+From 194a62ff94f47f93dd7b572b8298d4634be2e5e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Jun 2021 16:49:02 +0300
+Subject: net: ethernet: fix potential use-after-free in ec_bhf_remove
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 9cca0c2d70149160407bda9a9446ce0c29b6e6c6 ]
+
+static void ec_bhf_remove(struct pci_dev *dev)
+{
+...
+       struct ec_bhf_priv *priv = netdev_priv(net_dev);
+
+       unregister_netdev(net_dev);
+       free_netdev(net_dev);
+
+       pci_iounmap(dev, priv->dma_io);
+       pci_iounmap(dev, priv->io);
+...
+}
+
+priv is netdev private data, but it is used
+after free_netdev(). It can cause use-after-free when accessing priv
+pointer. So, fix it by moving free_netdev() after pci_iounmap()
+calls.
+
+Fixes: 6af55ff52b02 ("Driver for Beckhoff CX5020 EtherCAT master module.")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ec_bhf.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
+index 46b0dbab8aad..7c992172933b 100644
+--- a/drivers/net/ethernet/ec_bhf.c
++++ b/drivers/net/ethernet/ec_bhf.c
+@@ -576,10 +576,12 @@ static void ec_bhf_remove(struct pci_dev *dev)
+       struct ec_bhf_priv *priv = netdev_priv(net_dev);
+       unregister_netdev(net_dev);
+-      free_netdev(net_dev);
+       pci_iounmap(dev, priv->dma_io);
+       pci_iounmap(dev, priv->io);
++
++      free_netdev(net_dev);
++
+       pci_release_regions(dev);
+       pci_clear_master(dev);
+       pci_disable_device(dev);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-fec_ptp-fix-issue-caused-by-refactor-the-fec_dev.patch b/queue-5.12/net-fec_ptp-fix-issue-caused-by-refactor-the-fec_dev.patch
new file mode 100644 (file)
index 0000000..b48d873
--- /dev/null
@@ -0,0 +1,44 @@
+From f70ccd9f48488e65ef6cdf910d0176e3513ff135 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 17:14:26 +0800
+Subject: net: fec_ptp: fix issue caused by refactor the fec_devtype
+
+From: Joakim Zhang <qiangqing.zhang@nxp.com>
+
+[ Upstream commit d23765646e71b43ed2b809930411ba5c0aadee7b ]
+
+Commit da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
+refactor the fec_devtype, need adjust ptp driver accordingly.
+
+Fixes: da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
+Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_ptp.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
+index 1753807cbf97..ce8e5555f3e0 100644
+--- a/drivers/net/ethernet/freescale/fec_ptp.c
++++ b/drivers/net/ethernet/freescale/fec_ptp.c
+@@ -215,15 +215,13 @@ static u64 fec_ptp_read(const struct cyclecounter *cc)
+ {
+       struct fec_enet_private *fep =
+               container_of(cc, struct fec_enet_private, cc);
+-      const struct platform_device_id *id_entry =
+-              platform_get_device_id(fep->pdev);
+       u32 tempval;
+       tempval = readl(fep->hwp + FEC_ATIME_CTRL);
+       tempval |= FEC_T_CTRL_CAPTURE;
+       writel(tempval, fep->hwp + FEC_ATIME_CTRL);
+-      if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE)
++      if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
+               udelay(1);
+       return readl(fep->hwp + FEC_ATIME);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-hamradio-fix-memory-leak-in-mkiss_close.patch b/queue-5.12/net-hamradio-fix-memory-leak-in-mkiss_close.patch
new file mode 100644 (file)
index 0000000..c8bfebd
--- /dev/null
@@ -0,0 +1,112 @@
+From 210c99dd0893e373b60d4ad69bdaae475eb48685 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 22:09:06 +0300
+Subject: net: hamradio: fix memory leak in mkiss_close
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 7edcc682301492380fbdd604b4516af5ae667a13 ]
+
+My local syzbot instance hit memory leak in
+mkiss_open()[1]. The problem was in missing
+free_netdev() in mkiss_close().
+
+In mkiss_open() netdevice is allocated and then
+registered, but in mkiss_close() netdevice was
+only unregistered, but not freed.
+
+Fail log:
+
+BUG: memory leak
+unreferenced object 0xffff8880281ba000 (size 4096):
+  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+  hex dump (first 32 bytes):
+    61 78 30 00 00 00 00 00 00 00 00 00 00 00 00 00  ax0.............
+    00 27 fa 2a 80 88 ff ff 00 00 00 00 00 00 00 00  .'.*............
+  backtrace:
+    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+    [<ffffffff8706e7e8>] alloc_netdev_mqs+0x98/0xe80
+    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff8880141a9a00 (size 96):
+  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+  hex dump (first 32 bytes):
+    e8 a2 1b 28 80 88 ff ff e8 a2 1b 28 80 88 ff ff  ...(.......(....
+    98 92 9c aa b0 40 02 00 00 00 00 00 00 00 00 00  .....@..........
+  backtrace:
+    [<ffffffff8709f68b>] __hw_addr_create_ex+0x5b/0x310
+    [<ffffffff8709fb38>] __hw_addr_add_ex+0x1f8/0x2b0
+    [<ffffffff870a0c7b>] dev_addr_init+0x10b/0x1f0
+    [<ffffffff8706e88b>] alloc_netdev_mqs+0x13b/0xe80
+    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff8880219bfc00 (size 512):
+  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+  hex dump (first 32 bytes):
+    00 a0 1b 28 80 88 ff ff 80 8f b1 8d ff ff ff ff  ...(............
+    80 8f b1 8d ff ff ff ff 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+    [<ffffffff8706eec7>] alloc_netdev_mqs+0x777/0xe80
+    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+BUG: memory leak
+unreferenced object 0xffff888029b2b200 (size 256):
+  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
+    [<ffffffff8706f062>] alloc_netdev_mqs+0x912/0xe80
+    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
+    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
+    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
+    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
+    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
+    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
+    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Fixes: 815f62bf7427 ("[PATCH] SMP rewrite of mkiss")
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hamradio/mkiss.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
+index 17be2bb2985c..920e9f888cc3 100644
+--- a/drivers/net/hamradio/mkiss.c
++++ b/drivers/net/hamradio/mkiss.c
+@@ -799,6 +799,7 @@ static void mkiss_close(struct tty_struct *tty)
+       ax->tty = NULL;
+       unregister_netdev(ax->dev);
++      free_netdev(ax->dev);
+ }
+ /* Perform I/O control on an active ax25 channel. */
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-ipv4-fix-memory-leak-in-ip_mc_add1_src.patch b/queue-5.12/net-ipv4-fix-memory-leak-in-ip_mc_add1_src.patch
new file mode 100644 (file)
index 0000000..3cf365c
--- /dev/null
@@ -0,0 +1,86 @@
+From de862b0cf70b3c24107b4dec5973963e62338dd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 17:59:25 +0800
+Subject: net: ipv4: fix memory leak in ip_mc_add1_src
+
+From: Chengyang Fan <cy.fan@huawei.com>
+
+[ Upstream commit d8e2973029b8b2ce477b564824431f3385c77083 ]
+
+BUG: memory leak
+unreferenced object 0xffff888101bc4c00 (size 32):
+  comm "syz-executor527", pid 360, jiffies 4294807421 (age 19.329s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+    01 00 00 00 00 00 00 00 ac 14 14 bb 00 00 02 00 ................
+  backtrace:
+    [<00000000f17c5244>] kmalloc include/linux/slab.h:558 [inline]
+    [<00000000f17c5244>] kzalloc include/linux/slab.h:688 [inline]
+    [<00000000f17c5244>] ip_mc_add1_src net/ipv4/igmp.c:1971 [inline]
+    [<00000000f17c5244>] ip_mc_add_src+0x95f/0xdb0 net/ipv4/igmp.c:2095
+    [<000000001cb99709>] ip_mc_source+0x84c/0xea0 net/ipv4/igmp.c:2416
+    [<0000000052cf19ed>] do_ip_setsockopt net/ipv4/ip_sockglue.c:1294 [inline]
+    [<0000000052cf19ed>] ip_setsockopt+0x114b/0x30c0 net/ipv4/ip_sockglue.c:1423
+    [<00000000477edfbc>] raw_setsockopt+0x13d/0x170 net/ipv4/raw.c:857
+    [<00000000e75ca9bb>] __sys_setsockopt+0x158/0x270 net/socket.c:2117
+    [<00000000bdb993a8>] __do_sys_setsockopt net/socket.c:2128 [inline]
+    [<00000000bdb993a8>] __se_sys_setsockopt net/socket.c:2125 [inline]
+    [<00000000bdb993a8>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2125
+    [<000000006a1ffdbd>] do_syscall_64+0x40/0x80 arch/x86/entry/common.c:47
+    [<00000000b11467c4>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+In commit 24803f38a5c0 ("igmp: do not remove igmp souce list info when set
+link down"), the ip_mc_clear_src() in ip_mc_destroy_dev() was removed,
+because it was also called in igmpv3_clear_delrec().
+
+Rough callgraph:
+
+inetdev_destroy
+-> ip_mc_destroy_dev
+     -> igmpv3_clear_delrec
+        -> ip_mc_clear_src
+-> RCU_INIT_POINTER(dev->ip_ptr, NULL)
+
+However, ip_mc_clear_src() called in igmpv3_clear_delrec() doesn't
+release in_dev->mc_list->sources. And RCU_INIT_POINTER() assigns the
+NULL to dev->ip_ptr. As a result, in_dev cannot be obtained through
+inetdev_by_index() and then in_dev->mc_list->sources cannot be released
+by ip_mc_del1_src() in the sock_close. Rough call sequence goes like:
+
+sock_close
+-> __sock_release
+   -> inet_release
+      -> ip_mc_drop_socket
+         -> inetdev_by_index
+         -> ip_mc_leave_src
+            -> ip_mc_del_src
+               -> ip_mc_del1_src
+
+So we still need to call ip_mc_clear_src() in ip_mc_destroy_dev() to free
+in_dev->mc_list->sources.
+
+Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info ...")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Chengyang Fan <cy.fan@huawei.com>
+Acked-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 7b272bbed2b4..6b3c558a4f23 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
+       while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
+               in_dev->mc_list = i->next_rcu;
+               in_dev->mc_count--;
++              ip_mc_clear_src(i);
+               ip_ma_put(i);
+       }
+ }
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-ipv4-fix-memory-leak-in-netlbl_cipsov4_add_std.patch b/queue-5.12/net-ipv4-fix-memory-leak-in-netlbl_cipsov4_add_std.patch
new file mode 100644 (file)
index 0000000..482d613
--- /dev/null
@@ -0,0 +1,66 @@
+From f84dfb967850facd132caee98ec1709932ffbb87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 09:51:58 +0800
+Subject: net: ipv4: fix memory leak in netlbl_cipsov4_add_std
+
+From: Nanyong Sun <sunnanyong@huawei.com>
+
+[ Upstream commit d612c3f3fae221e7ea736d196581c2217304bbbc ]
+
+Reported by syzkaller:
+BUG: memory leak
+unreferenced object 0xffff888105df7000 (size 64):
+comm "syz-executor842", pid 360, jiffies 4294824824 (age 22.546s)
+hex dump (first 32 bytes):
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+backtrace:
+[<00000000e67ed558>] kmalloc include/linux/slab.h:590 [inline]
+[<00000000e67ed558>] kzalloc include/linux/slab.h:720 [inline]
+[<00000000e67ed558>] netlbl_cipsov4_add_std net/netlabel/netlabel_cipso_v4.c:145 [inline]
+[<00000000e67ed558>] netlbl_cipsov4_add+0x390/0x2340 net/netlabel/netlabel_cipso_v4.c:416
+[<0000000006040154>] genl_family_rcv_msg_doit.isra.0+0x20e/0x320 net/netlink/genetlink.c:739
+[<00000000204d7a1c>] genl_family_rcv_msg net/netlink/genetlink.c:783 [inline]
+[<00000000204d7a1c>] genl_rcv_msg+0x2bf/0x4f0 net/netlink/genetlink.c:800
+[<00000000c0d6a995>] netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2504
+[<00000000d78b9d2c>] genl_rcv+0x24/0x40 net/netlink/genetlink.c:811
+[<000000009733081b>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
+[<000000009733081b>] netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1340
+[<00000000d5fd43b8>] netlink_sendmsg+0x789/0xc70 net/netlink/af_netlink.c:1929
+[<000000000a2d1e40>] sock_sendmsg_nosec net/socket.c:654 [inline]
+[<000000000a2d1e40>] sock_sendmsg+0x139/0x170 net/socket.c:674
+[<00000000321d1969>] ____sys_sendmsg+0x658/0x7d0 net/socket.c:2350
+[<00000000964e16bc>] ___sys_sendmsg+0xf8/0x170 net/socket.c:2404
+[<000000001615e288>] __sys_sendmsg+0xd3/0x190 net/socket.c:2433
+[<000000004ee8b6a5>] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:47
+[<00000000171c7cee>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The memory of doi_def->map.std pointing is allocated in
+netlbl_cipsov4_add_std, but no place has freed it. It should be
+freed in cipso_v4_doi_free which frees the cipso DOI resource.
+
+Fixes: 96cb8e3313c7a ("[NetLabel]: CIPSOv4 and Unlabeled packet integration")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/cipso_ipv4.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
+index bfaf327e9d12..e0480c6cebaa 100644
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+               kfree(doi_def->map.std->lvl.local);
+               kfree(doi_def->map.std->cat.cipso);
+               kfree(doi_def->map.std->cat.local);
++              kfree(doi_def->map.std);
+               break;
+       }
+       kfree(doi_def);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-lantiq-disable-interrupt-before-sheduling-napi.patch b/queue-5.12/net-lantiq-disable-interrupt-before-sheduling-napi.patch
new file mode 100644 (file)
index 0000000..0403415
--- /dev/null
@@ -0,0 +1,45 @@
+From 55d00539184fc2000fa31e2a0f4a045e2c4bc4e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 23:21:07 +0200
+Subject: net: lantiq: disable interrupt before sheduling NAPI
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit f2386cf7c5f4ff5d7b584f5d92014edd7df6c676 ]
+
+This patch fixes TX hangs with threaded NAPI enabled. The scheduled
+NAPI seems to be executed in parallel with the interrupt on second
+thread. Sometimes it happens that ltq_dma_disable_irq() is executed
+after xrx200_tx_housekeeping(). The symptom is that TX interrupts
+are disabled in the DMA controller. As a result, the TX hangs after
+a few seconds of the iperf test. Scheduling NAPI after disabling
+interrupts fixes this issue.
+
+Tested on Lantiq xRX200 (BT Home Hub 5A).
+
+Fixes: 9423361da523 ("net: lantiq: Disable IRQs only if NAPI gets scheduled ")
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index 135ba5b6ae98..3da494df72f3 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -352,8 +352,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
+       struct xrx200_chan *ch = ptr;
+       if (napi_schedule_prep(&ch->napi)) {
+-              __napi_schedule(&ch->napi);
+               ltq_dma_disable_irq(&ch->dma);
++              __napi_schedule(&ch->napi);
+       }
+       ltq_dma_ack_irq(&ch->dma);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-make-get_net_ns-return-error-if-net_ns-is-disabl.patch b/queue-5.12/net-make-get_net_ns-return-error-if-net_ns-is-disabl.patch
new file mode 100644 (file)
index 0000000..92bb5be
--- /dev/null
@@ -0,0 +1,131 @@
+From 6181162d9d7675fb4a83fc28fee66ab5629be730 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 22:29:59 +0800
+Subject: net: make get_net_ns return error if NET_NS is disabled
+
+From: Changbin Du <changbin.du@gmail.com>
+
+[ Upstream commit ea6932d70e223e02fea3ae20a4feff05d7c1ea9a ]
+
+There is a panic in socket ioctl cmd SIOCGSKNS when NET_NS is not enabled.
+The reason is that nsfs tries to access ns->ops but the proc_ns_operations
+is not implemented in this case.
+
+[7.670023] Unable to handle kernel NULL pointer dereference at virtual address 00000010
+[7.670268] pgd = 32b54000
+[7.670544] [00000010] *pgd=00000000
+[7.671861] Internal error: Oops: 5 [#1] SMP ARM
+[7.672315] Modules linked in:
+[7.672918] CPU: 0 PID: 1 Comm: systemd Not tainted 5.13.0-rc3-00375-g6799d4f2da49 #16
+[7.673309] Hardware name: Generic DT based system
+[7.673642] PC is at nsfs_evict+0x24/0x30
+[7.674486] LR is at clear_inode+0x20/0x9c
+
+The same to tun SIOCGSKNS command.
+
+To fix this problem, we make get_net_ns() return -EINVAL when NET_NS is
+disabled. Meanwhile move it to right place net/core/net_namespace.c.
+
+Signed-off-by: Changbin Du <changbin.du@gmail.com>
+Fixes: c62cce2caee5 ("net: add an ioctl to get a socket network namespace")
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/socket.h      |  2 --
+ include/net/net_namespace.h |  7 +++++++
+ net/core/net_namespace.c    | 12 ++++++++++++
+ net/socket.c                | 13 -------------
+ 4 files changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/include/linux/socket.h b/include/linux/socket.h
+index 385894b4a8bb..42222a84167f 100644
+--- a/include/linux/socket.h
++++ b/include/linux/socket.h
+@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
+                           int __user *usockvec);
+ extern int __sys_shutdown_sock(struct socket *sock, int how);
+ extern int __sys_shutdown(int fd, int how);
+-
+-extern struct ns_common *get_net_ns(struct ns_common *ns);
+ #endif /* _LINUX_SOCKET_H */
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index dcaee24a4d87..14b6f7f44532 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -197,6 +197,8 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
+ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+ void net_ns_barrier(void);
++
++struct ns_common *get_net_ns(struct ns_common *ns);
+ #else /* CONFIG_NET_NS */
+ #include <linux/sched.h>
+ #include <linux/nsproxy.h>
+@@ -216,6 +218,11 @@ static inline void net_ns_get_ownership(const struct net *net,
+ }
+ static inline void net_ns_barrier(void) {}
++
++static inline struct ns_common *get_net_ns(struct ns_common *ns)
++{
++      return ERR_PTR(-EINVAL);
++}
+ #endif /* CONFIG_NET_NS */
+diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
+index 43b6ac4c4439..cc8dafb25d61 100644
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -641,6 +641,18 @@ void __put_net(struct net *net)
+ }
+ EXPORT_SYMBOL_GPL(__put_net);
++/**
++ * get_net_ns - increment the refcount of the network namespace
++ * @ns: common namespace (net)
++ *
++ * Returns the net's common namespace.
++ */
++struct ns_common *get_net_ns(struct ns_common *ns)
++{
++      return &get_net(container_of(ns, struct net, ns))->ns;
++}
++EXPORT_SYMBOL_GPL(get_net_ns);
++
+ struct net *get_net_ns_by_fd(int fd)
+ {
+       struct file *file;
+diff --git a/net/socket.c b/net/socket.c
+index 84a8049c2b09..03259cb919f7 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
+  *    what to do with it - that's up to the protocol still.
+  */
+-/**
+- *    get_net_ns - increment the refcount of the network namespace
+- *    @ns: common namespace (net)
+- *
+- *    Returns the net's common namespace.
+- */
+-
+-struct ns_common *get_net_ns(struct ns_common *ns)
+-{
+-      return &get_net(container_of(ns, struct net, ns))->ns;
+-}
+-EXPORT_SYMBOL_GPL(get_net_ns);
+-
+ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+ {
+       struct socket *sock;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mhi_net-update-the-transmit-handler-prototype.patch b/queue-5.12/net-mhi_net-update-the-transmit-handler-prototype.patch
new file mode 100644 (file)
index 0000000..792be07
--- /dev/null
@@ -0,0 +1,37 @@
+From ab0b25b074b2bf74b793daa4be4ca25c6124fa6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:03:25 -0600
+Subject: net: mhi_net: Update the transmit handler prototype
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+[ Upstream commit 2214fb53006e6cfa6371b706070cb99794c68c3b ]
+
+Update the function prototype of mhi_ndo_xmit to match
+ndo_start_xmit. This otherwise leads to run time failures when
+CFI is enabled in kernel.
+
+Fixes: 3ffec6a14f24 ("net: Add mhi-net driver")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mhi/net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
+index f59960876083..8e7f8728998f 100644
+--- a/drivers/net/mhi/net.c
++++ b/drivers/net/mhi/net.c
+@@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
+       return 0;
+ }
+-static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
++static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+ {
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       const struct mhi_net_proto *proto = mhi_netdev->proto;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-check-that-driver-was-probed-prior-attachin.patch b/queue-5.12/net-mlx5-check-that-driver-was-probed-prior-attachin.patch
new file mode 100644 (file)
index 0000000..e4654c8
--- /dev/null
@@ -0,0 +1,139 @@
+From 492739f83893a62f03f06005b716310b7436d424 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Mar 2021 19:57:14 +0200
+Subject: net/mlx5: Check that driver was probed prior attaching the device
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 2058cc9c8041fde9c0bdd8e868c72b137cff8563 ]
+
+The device can be requested to be attached despite being not probed.
+This situation is possible if devlink reload races with module removal,
+and the following kernel panic is an outcome of such race.
+
+ mlx5_core 0000:00:09.0: firmware version: 4.7.9999
+ mlx5_core 0000:00:09.0: 0.000 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x255 link)
+ BUG: unable to handle page fault for address: fffffffffffffff0
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 3218067 P4D 3218067 PUD 321a067 PMD 0
+ Oops: 0000 [#1] SMP KASAN NOPTI
+ CPU: 7 PID: 250 Comm: devlink Not tainted 5.12.0-rc2+ #2836
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:mlx5_attach_device+0x80/0x280 [mlx5_core]
+ Code: f8 48 c1 e8 03 42 80 3c 38 00 0f 85 80 01 00 00 48 8b 45 68 48 8d 78 f0 48 89 fe 48 c1 ee 03 42 80 3c 3e 00 0f 85 70 01 00 00 <48> 8b 40 f0 48 85 c0 74 0d 48 89 ef ff d0 85 c0 0f 85 84 05 0e 00
+ RSP: 0018:ffff8880129675f0 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff827407f1
+ RDX: 1ffff110011336cf RSI: 1ffffffffffffffe RDI: fffffffffffffff0
+ RBP: ffff888008e0c000 R08: 0000000000000008 R09: ffffffffa0662ee7
+ R10: fffffbfff40cc5dc R11: 0000000000000000 R12: ffff88800ea002e0
+ R13: ffffed1001d459f7 R14: ffffffffa05ef4f8 R15: dffffc0000000000
+ FS:  00007f51dfeaf740(0000) GS:ffff88806d5c0000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: fffffffffffffff0 CR3: 000000000bc82006 CR4: 0000000000370ea0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  mlx5_load_one+0x117/0x1d0 [mlx5_core]
+  devlink_reload+0x2d5/0x520
+  ? devlink_remote_reload_actions_performed+0x30/0x30
+  ? mutex_trylock+0x24b/0x2d0
+  ? devlink_nl_cmd_reload+0x62b/0x1070
+  devlink_nl_cmd_reload+0x66d/0x1070
+  ? devlink_reload+0x520/0x520
+  ? devlink_nl_pre_doit+0x64/0x4d0
+  genl_family_rcv_msg_doit+0x1e9/0x2f0
+  ? mutex_lock_io_nested+0x1130/0x1130
+  ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240
+  ? security_capable+0x51/0x90
+  genl_rcv_msg+0x27f/0x4a0
+  ? genl_get_cmd+0x3c0/0x3c0
+  ? lock_acquire+0x1a9/0x6d0
+  ? devlink_reload+0x520/0x520
+  ? lock_release+0x6c0/0x6c0
+  netlink_rcv_skb+0x11d/0x340
+  ? genl_get_cmd+0x3c0/0x3c0
+  ? netlink_ack+0x9f0/0x9f0
+  ? lock_release+0x1f9/0x6c0
+  genl_rcv+0x24/0x40
+  netlink_unicast+0x433/0x700
+  ? netlink_attachskb+0x730/0x730
+  ? _copy_from_iter_full+0x178/0x650
+  ? __alloc_skb+0x113/0x2b0
+  netlink_sendmsg+0x6f1/0xbd0
+  ? netlink_unicast+0x700/0x700
+  ? netlink_unicast+0x700/0x700
+  sock_sendmsg+0xb0/0xe0
+  __sys_sendto+0x193/0x240
+  ? __x64_sys_getpeername+0xb0/0xb0
+  ? copy_page_range+0x2300/0x2300
+  ? __up_read+0x1a1/0x7b0
+  ? do_user_addr_fault+0x219/0xdc0
+  __x64_sys_sendto+0xdd/0x1b0
+  ? syscall_enter_from_user_mode+0x1d/0x50
+  do_syscall_64+0x2d/0x40
+  entry_SYSCALL_64_after_hwframe+0x44/0xae
+ RIP: 0033:0x7f51dffb514a
+ Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
+ RSP: 002b:00007ffcaef22e78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f51dffb514a
+ RDX: 0000000000000030 RSI: 000055750daf2440 RDI: 0000000000000003
+ RBP: 000055750daf2410 R08: 00007f51e0081200 R09: 000000000000000c
+ R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ Modules linked in: mlx5_core(-) ptp pps_core ib_ipoib rdma_ucm rdma_cm iw_cm ib_cm ib_umad ib_uverbs ib_core [last unloaded: mlx5_ib]
+ CR2: fffffffffffffff0
+ ---[ end trace 7789831bfe74fa42 ]---
+
+Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+index 9153c9bda96f..f0623e94716b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+@@ -323,6 +323,16 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
+                       }
+               } else {
+                       adev = &priv->adev[i]->adev;
++
++                      /* Pay attention that this is not PCI driver that
++                       * mlx5_core_dev is connected, but auxiliary driver.
++                       *
++                       * Here we can race of module unload with devlink
++                       * reload, but we don't need to take extra lock because
++                       * we are holding global mlx5_intf_mutex.
++                       */
++                      if (!adev->dev.driver)
++                              continue;
+                       adrv = to_auxiliary_drv(adev->dev.driver);
+                       if (adrv->resume)
+@@ -353,6 +363,10 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
+                       continue;
+               adev = &priv->adev[i]->adev;
++              /* Auxiliary driver was unbind manually through sysfs */
++              if (!adev->dev.driver)
++                      goto skip_suspend;
++
+               adrv = to_auxiliary_drv(adev->dev.driver);
+               if (adrv->suspend) {
+@@ -360,6 +374,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
+                       continue;
+               }
++skip_suspend:
+               del_adev(&priv->adev[i]->adev);
+               priv->adev[i] = NULL;
+       }
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-consider-roce-cap-before-init-rdma-resource.patch b/queue-5.12/net-mlx5-consider-roce-cap-before-init-rdma-resource.patch
new file mode 100644 (file)
index 0000000..d9ea611
--- /dev/null
@@ -0,0 +1,37 @@
+From ede1b45ddc017b77698fc92bd5a6abb4cbdb336a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:20:46 +0300
+Subject: net/mlx5: Consider RoCE cap before init RDMA resources
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit c189716b2a7c1d2d8658e269735273caa1c38b54 ]
+
+Check if RoCE is supported by the device before enable it in
+the vport context and create all the RDMA steering objects.
+
+Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+index 8e0dddc6383f..2389239acadc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+ {
+       int err;
++      if (!MLX5_CAP_GEN(dev, roce))
++              return;
++
+       err = mlx5_nic_vport_enable_roce(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-dr-don-t-use-sw-steering-when-roce-is-not-s.patch b/queue-5.12/net-mlx5-dr-don-t-use-sw-steering-when-roce-is-not-s.patch
new file mode 100644 (file)
index 0000000..5b2987a
--- /dev/null
@@ -0,0 +1,45 @@
+From 8cacf28e7420cc0a53c793edff81201f66dffdc6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Jun 2021 11:23:41 +0300
+Subject: net/mlx5: DR, Don't use SW steering when RoCE is not supported
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 4aaf96ac8b45d8e2e019b6b53cce65a73c4ace2c ]
+
+SW steering uses RC QP to write/read to/from ICM, hence it's not
+supported when RoCE is not supported as well.
+
+Fixes: 70605ea545e8 ("net/mlx5: DR, Expose APIs for direct rule managing")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Reviewed-by: Alex Vesker <valex@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h    | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+index 612b0ac31db2..9737565cd8d4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+@@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
+ static inline bool
+ mlx5dr_is_supported(struct mlx5_core_dev *dev)
+ {
+-      return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+-             (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+-              (MLX5_CAP_GEN(dev, steering_format_version) <=
+-               MLX5_STEERING_FORMAT_CONNECTX_6DX));
++      return MLX5_CAP_GEN(dev, roce) &&
++             (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
++              (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
++               (MLX5_CAP_GEN(dev, steering_format_version) <=
++                MLX5_STEERING_FORMAT_CONNECTX_6DX)));
+ }
+ /* buddy functions & structure */
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-dr-fix-stev1-incorrect-l3-decapsulation-pad.patch b/queue-5.12/net-mlx5-dr-fix-stev1-incorrect-l3-decapsulation-pad.patch
new file mode 100644 (file)
index 0000000..f224d46
--- /dev/null
@@ -0,0 +1,93 @@
+From 246ec048ae11602715ca8eb9a3d5ae11cdec493c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Jun 2021 18:10:06 +0300
+Subject: net/mlx5: DR, Fix STEv1 incorrect L3 decapsulation padding
+
+From: Alex Vesker <valex@nvidia.com>
+
+[ Upstream commit 65fb7d109abe3a1a9f1c2d3ba7e1249bc978d5f0 ]
+
+Decapsulation L3 on small inner packets which are less than
+64 Bytes was done incorrectly. In small packets there is an
+extra padding added in L2 which should not be included in L3
+length. The issue was that after decapL3 the extra L2 padding
+caused an update on the L3 length.
+
+To avoid this issue the new header is pushed to the beginning
+of the packet (offset 0) which should not cause a HW reparse
+and update the L3 length.
+
+Fixes: c349b4137cfd ("net/mlx5: DR, Add STEv1 modify header logic")
+Reviewed-by: Erez Shitrit <erezsh@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Alex Vesker <valex@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/steering/dr_ste_v1.c   | 26 ++++++++++++-------
+ 1 file changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+index f146c618a78e..46ef45fa9167 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+@@ -712,7 +712,11 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
+       if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
+               return -EINVAL;
+-      memcpy(padded_data, data, data_sz);
++      inline_data_sz =
++              MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
++
++      /* Add an alignment padding  */
++      memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
+       /* Remove L2L3 outer headers */
+       MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
+@@ -724,32 +728,34 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
+       hw_action += DR_STE_ACTION_DOUBLE_SZ;
+       used_actions++; /* Remove and NOP are a single double action */
+-      inline_data_sz =
+-              MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
++      /* Point to the last dword of the header */
++      data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
+-      /* Add the new header inline + 2 extra bytes */
++      /* Add the new header using inline action 4Byte at a time, the header
++       * is added in reversed order to the beginning of the packet to avoid
++       * incorrect parsing by the HW. Since header is 14B or 18B an extra
++       * two bytes are padded and later removed.
++       */
+       for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
+               void *addr_inline;
+               MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
+                        DR_STE_V1_ACTION_ID_INSERT_INLINE);
+               /* The hardware expects here offset to words (2 bytes) */
+-              MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
+-                       i * 2);
++              MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
+               /* Copy bytes one by one to avoid endianness problem */
+               addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
+                                          hw_action, inline_data);
+-              memcpy(addr_inline, data_ptr, inline_data_sz);
++              memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
+               hw_action += DR_STE_ACTION_DOUBLE_SZ;
+-              data_ptr += inline_data_sz;
+               used_actions++;
+       }
+-      /* Remove 2 extra bytes */
++      /* Remove first 2 extra bytes */
+       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
+                DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+-      MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
++      MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
+       /* The hardware expects here size in words (2 bytes) */
+       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
+       used_actions++;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-e-switch-allow-setting-guid-for-host-pf-vpo.patch b/queue-5.12/net-mlx5-e-switch-allow-setting-guid-for-host-pf-vpo.patch
new file mode 100644 (file)
index 0000000..b3c9f07
--- /dev/null
@@ -0,0 +1,50 @@
+From 763a191a1223f866fba3da4a9ddd47eb5730c48d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:03:24 +0300
+Subject: net/mlx5: E-Switch, Allow setting GUID for host PF vport
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit ca36fc4d77b35b8d142cf1ed0eae5ec2e071dc3c ]
+
+E-switch should be able to set the GUID of host PF vport.
+Currently it returns an error. This results in below error
+when user attempts to configure MAC address of the PF of an
+external controller.
+
+$ devlink port function set pci/0000:03:00.0/196608 \
+   hw_addr 00:00:00:11:22:33
+
+mlx5_core 0000:03:00.0: mlx5_esw_set_vport_mac_locked:1876:(pid 6715):\
+"Failed to set vport 0 node guid, err = -22.
+RDMA_CM will not function properly for this VF."
+
+Check for zero vport is no longer needed.
+
+Fixes: 330077d14de1 ("net/mlx5: E-switch, Supporting setting devlink port function mac address")
+Signed-off-by: Yuval Avnery <yuvalav@nvidia.com>
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Bodong Wang <bodong@nvidia.com>
+Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+index e05c5c0f3ae1..7d21fbb9192f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+@@ -465,8 +465,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+       void *in;
+       int err;
+-      if (!vport)
+-              return -EINVAL;
+       if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+               return -EACCES;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-e-switch-read-pf-mac-address.patch b/queue-5.12/net-mlx5-e-switch-read-pf-mac-address.patch
new file mode 100644 (file)
index 0000000..47a7519
--- /dev/null
@@ -0,0 +1,81 @@
+From 0fb9bbae616b9c8137a77cbde1d00a1a68104d7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 19:14:08 +0300
+Subject: net/mlx5: E-Switch, Read PF mac address
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit bbc8222dc49db8d49add0f27bcac33f4b92193dc ]
+
+External controller PF's MAC address is not read from the device during
+vport setup. Fail to read this results in showing all zeros to user
+while the factory programmed MAC is a valid value.
+
+$ devlink port show eth1 -jp
+{
+    "port": {
+        "pci/0000:03:00.0/196608": {
+            "type": "eth",
+            "netdev": "eth1",
+            "flavour": "pcipf",
+            "controller": 1,
+            "pfnum": 0,
+            "splittable": false,
+            "function": {
+                "hw_addr": "00:00:00:00:00:00"
+            }
+        }
+    }
+}
+
+Hence, read it when enabling a vport.
+
+After the fix,
+
+$ devlink port show eth1 -jp
+{
+    "port": {
+        "pci/0000:03:00.0/196608": {
+            "type": "eth",
+            "netdev": "eth1",
+            "flavour": "pcipf",
+            "controller": 1,
+            "pfnum": 0,
+            "splittable": false,
+            "function": {
+                "hw_addr": "98:03:9b:a0:60:11"
+            }
+        }
+    }
+}
+
+Fixes: f099fde16db3 ("net/mlx5: E-switch, Support querying port function mac address")
+Signed-off-by: Bodong Wang <bodong@nvidia.com>
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 2c6d95900e3c..a3edeea4ddd7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1308,6 +1308,12 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+                       goto err_vhca_mapping;
+       }
++      /* External controller host PF has factory programmed MAC.
++       * Read it from the device.
++       */
++      if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
++              mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
++
+       esw_vport_change_handle_locked(vport);
+       esw->enabled_vports++;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-fix-error-path-for-set-hca-defaults.patch b/queue-5.12/net-mlx5-fix-error-path-for-set-hca-defaults.patch
new file mode 100644 (file)
index 0000000..e2f397c
--- /dev/null
@@ -0,0 +1,47 @@
+From 0ef3c0c99483c5e35b7f8cafa71b92f4cf903f61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Mar 2021 15:41:55 +0200
+Subject: net/mlx5: Fix error path for set HCA defaults
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 94a4b8414d3e91104873007b659252f855ee344a ]
+
+In the case of the failure to execute mlx5_core_set_hca_defaults(),
+we used wrong goto label to execute error unwind flow.
+
+Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized")
+Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index efb93d63e54c..58b8f75d7a01 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1157,7 +1157,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
+       err = mlx5_core_set_hca_defaults(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to set hca defaults\n");
+-              goto err_sriov;
++              goto err_set_hca;
+       }
+       mlx5_vhca_event_start(dev);
+@@ -1190,6 +1190,7 @@ err_ec:
+       mlx5_sf_hw_table_destroy(dev);
+ err_vhca:
+       mlx5_vhca_event_stop(dev);
++err_set_hca:
+       mlx5_cleanup_fs(dev);
+ err_fs:
+       mlx5_accel_tls_cleanup(dev);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-reset-mkey-index-on-creation.patch b/queue-5.12/net-mlx5-reset-mkey-index-on-creation.patch
new file mode 100644 (file)
index 0000000..2b6f3f8
--- /dev/null
@@ -0,0 +1,41 @@
+From 03f5565e2579dae159d0c34ca485998323acaed2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 14:20:28 +0300
+Subject: net/mlx5: Reset mkey index on creation
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 0232fc2ddcf4ffe01069fd1aa07922652120f44a ]
+
+Reset only the index part of the mkey and keep the variant part. On
+devlink reload, driver recreates mkeys, so the mkey index may change.
+Trying to preserve the variant part of the mkey, driver mistakenly
+merged the mkey index with current value. In case of a devlink reload,
+current value of index part is dirty, so the index may be corrupted.
+
+Fixes: 54c62e13ad76 ("{IB,net}/mlx5: Setup mkey variant before mr create command invocation")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Amir Tzin <amirtz@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+index 50af84e76fb6..174f71ed5280 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+@@ -54,7 +54,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+       mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+       mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+       mkey->size = MLX5_GET64(mkc, mkc, len);
+-      mkey->key |= mlx5_idx_to_mkey(mkey_index);
++      mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
+       mkey->pd = MLX5_GET(mkc, mkc, pd);
+       init_waitqueue_head(&mkey->wait);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5-sf_dev-remove-sf-device-on-invalid-state.patch b/queue-5.12/net-mlx5-sf_dev-remove-sf-device-on-invalid-state.patch
new file mode 100644 (file)
index 0000000..96f21d9
--- /dev/null
@@ -0,0 +1,43 @@
+From 3a2693b1bd2bd969d87f65e7392eeb96ca809f94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 18:39:53 +0300
+Subject: net/mlx5: SF_DEV, remove SF device on invalid state
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit c7d6c19b3bde66d7aebbe93e0f9e6d9ff57fc3fa ]
+
+When auxiliary bus autoprobe is disabled and SF is in ACTIVE state,
+on SF port deletion it transitions from ACTIVE->ALLOCATED->INVALID.
+
+When VHCA event handler queries the state, it is already transition
+to INVALID state.
+
+In this scenario, event handler missed to delete the SF device.
+
+Fix it by deleting the SF when SF state is INVALID.
+
+Fixes: 90d010b8634b ("net/mlx5: SF, Add auxiliary device support")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Vu Pham <vuhuong@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+index 90b524c59f3c..c4139f4648bf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+@@ -153,6 +153,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
+       sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id);
+       sf_dev = xa_load(&table->devices, sf_index);
+       switch (event->new_vhca_state) {
++      case MLX5_VHCA_STATE_INVALID:
+       case MLX5_VHCA_STATE_ALLOCATED:
+               if (sf_dev)
+                       mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-gre.patch b/queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-gre.patch
new file mode 100644 (file)
index 0000000..7f4e4c9
--- /dev/null
@@ -0,0 +1,45 @@
+From 4d863dc2a18bad335b9b880410dbbe3a7e018bfe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 May 2021 10:40:36 +0300
+Subject: net/mlx5e: Block offload of outer header csum for GRE tunnel
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 54e1217b90486c94b26f24dcee1ee5ef5372f832 ]
+
+The device is able to offload either the outer header csum or inner
+header csum. The driver utilizes the inner csum offload. So, prohibit
+setting of tx-gre-csum-segmentation and let it be: off[fixed].
+
+Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 2a3da167f248..16b8f5245032 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5174,12 +5174,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
+       }
+       if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+-              netdev->hw_features     |= NETIF_F_GSO_GRE |
+-                                         NETIF_F_GSO_GRE_CSUM;
+-              netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+-                                         NETIF_F_GSO_GRE_CSUM;
+-              netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+-                                              NETIF_F_GSO_GRE_CSUM;
++              netdev->hw_features     |= NETIF_F_GSO_GRE;
++              netdev->hw_enc_features |= NETIF_F_GSO_GRE;
++              netdev->gso_partial_features |= NETIF_F_GSO_GRE;
+       }
+       if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-udp.patch b/queue-5.12/net-mlx5e-block-offload-of-outer-header-csum-for-udp.patch
new file mode 100644 (file)
index 0000000..ff1a525
--- /dev/null
@@ -0,0 +1,46 @@
+From 1e7409821a98d50ca5a050231605875101173296 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 May 2021 14:34:58 +0300
+Subject: net/mlx5e: Block offload of outer header csum for UDP tunnels
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 6d6727dddc7f93fcc155cb8d0c49c29ae0e71122 ]
+
+The device is able to offload either the outer header csum or inner
+header csum. The driver utilizes the inner csum offload. Hence, block
+setting of tx-udp_tnl-csum-segmentation and set it to off[fixed].
+
+Fixes: b49663c8fb49 ("net/mlx5e: Add support for UDP tunnel segmentation with outer checksum offload")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 99dc9f2beed5..2a3da167f248 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5168,13 +5168,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
+       }
+       if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
+-              netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
+-                                         NETIF_F_GSO_UDP_TUNNEL_CSUM;
+-              netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+-                                         NETIF_F_GSO_UDP_TUNNEL_CSUM;
+-              netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+-              netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
+-                                       NETIF_F_GSO_UDP_TUNNEL_CSUM;
++              netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
++              netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
++              netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
+       }
+       if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-don-t-create-devices-during-unload-flow.patch b/queue-5.12/net-mlx5e-don-t-create-devices-during-unload-flow.patch
new file mode 100644 (file)
index 0000000..075a74d
--- /dev/null
@@ -0,0 +1,84 @@
+From d2bb0740fbb3cdaafa24d95069cdb6a9f3ff246d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 May 2021 11:14:19 +0300
+Subject: net/mlx5e: Don't create devices during unload flow
+
+From: Dmytro Linkin <dlinkin@nvidia.com>
+
+[ Upstream commit a5ae8fc9058e37437c8c1f82b3d412b4abd1b9e6 ]
+
+Running devlink reload command for port in switchdev mode cause
+resources to corrupt: driver can't release allocated EQ and reclaim
+memory pages, because "rdma" auxiliary device had add CQs which blocks
+EQ from deletion.
+Erroneous sequence happens during reload-down phase, and is following:
+
+1. detach device - suspends auxiliary devices which support it, destroys
+   others. During this step "eth-rep" and "rdma-rep" are destroyed,
+   "eth" - suspended.
+2. disable SRIOV - moves device to legacy mode; as part of disablement -
+   rescans drivers. This step adds "rdma" auxiliary device.
+3. destroy EQ table - <failure>.
+
+Driver shouldn't create any device during unload flows. To handle that
+implement MLX5_PRIV_FLAGS_DETACH flag, set it on device detach and unset
+on device attach. If flag is set do no-op on drivers rescan.
+
+Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
+Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 4 ++++
+ include/linux/mlx5/driver.h                   | 4 ++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+index f0623e94716b..897853a68cd0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+@@ -306,6 +306,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
+       int ret = 0, i;
+       mutex_lock(&mlx5_intf_mutex);
++      priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+       for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+               if (!priv->adev[i]) {
+                       bool is_supported = false;
+@@ -378,6 +379,7 @@ skip_suspend:
+               del_adev(&priv->adev[i]->adev);
+               priv->adev[i] = NULL;
+       }
++      priv->flags |= MLX5_PRIV_FLAGS_DETACH;
+       mutex_unlock(&mlx5_intf_mutex);
+ }
+@@ -466,6 +468,8 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
+       struct mlx5_priv *priv = &dev->priv;
+       lockdep_assert_held(&mlx5_intf_mutex);
++      if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
++              return 0;
+       delete_drivers(dev);
+       if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index 133967c40214..6a31bbba1b6f 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -541,6 +541,10 @@ struct mlx5_core_roce {
+ enum {
+       MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
+       MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
++      /* Set during device detach to block any further devices
++       * creation/deletion on drivers rescan. Unset during device attach.
++       */
++      MLX5_PRIV_FLAGS_DETACH = 1 << 2,
+ };
+ struct mlx5_adev {
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-fix-page-reclaim-for-dead-peer-hairpin.patch b/queue-5.12/net-mlx5e-fix-page-reclaim-for-dead-peer-hairpin.patch
new file mode 100644 (file)
index 0000000..e317af6
--- /dev/null
@@ -0,0 +1,169 @@
+From 3b06ca6032085a5604094032907be282297b1d96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 May 2021 13:45:10 +0300
+Subject: net/mlx5e: Fix page reclaim for dead peer hairpin
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+[ Upstream commit a3e5fd9314dfc4314a9567cde96e1aef83a7458a ]
+
+When adding a hairpin flow, a firmware-side send queue is created for
+the peer net device, which claims some host memory pages for its
+internal ring buffer. If the peer net device is removed/unbound before
+the hairpin flow is deleted, then the send queue is not destroyed which
+leads to a stack trace on pci device remove:
+
+[ 748.005230] mlx5_core 0000:08:00.2: wait_func:1094:(pid 12985): MANAGE_PAGES(0x108) timeout. Will cause a leak of a command resource
+[ 748.005231] mlx5_core 0000:08:00.2: reclaim_pages:514:(pid 12985): failed reclaiming pages: err -110
+[ 748.001835] mlx5_core 0000:08:00.2: mlx5_reclaim_root_pages:653:(pid 12985): failed reclaiming pages (-110) for func id 0x0
+[ 748.002171] ------------[ cut here ]------------
+[ 748.001177] FW pages counter is 4 after reclaiming all pages
+[ 748.001186] WARNING: CPU: 1 PID: 12985 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:685 mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]                      [  +0.002771] Modules linked in: cls_flower mlx5_ib mlx5_core ptp pps_core act_mirred sch_ingress openvswitch nsh xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_umad ib_ipoib iw_cm ib_cm ib_uverbs ib_core overlay fuse [last unloaded: pps_core]
+[ 748.007225] CPU: 1 PID: 12985 Comm: tee Not tainted 5.12.0+ #1
+[ 748.001376] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[ 748.002315] RIP: 0010:mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]
+[ 748.001679] Code: 28 00 00 00 0f 85 22 01 00 00 48 81 c4 b0 00 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c7 40 cc 19 a1 e8 9f 71 0e e2 <0f> 0b e9 30 ff ff ff 48 c7 c7 a0 cc 19 a1 e8 8c 71 0e e2 0f 0b e9
+[ 748.003781] RSP: 0018:ffff88815220faf8 EFLAGS: 00010286
+[ 748.001149] RAX: 0000000000000000 RBX: ffff8881b4900280 RCX: 0000000000000000
+[ 748.001445] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102a441f51
+[ 748.001614] RBP: 00000000000032b9 R08: 0000000000000001 R09: ffffed1054a15ee8
+[ 748.001446] R10: ffff8882a50af73b R11: ffffed1054a15ee7 R12: fffffbfff07c1e30
+[ 748.001447] R13: dffffc0000000000 R14: ffff8881b492cba8 R15: 0000000000000000
+[ 748.001429] FS:  00007f58bd08b580(0000) GS:ffff8882a5080000(0000) knlGS:0000000000000000
+[ 748.001695] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 748.001309] CR2: 000055a026351740 CR3: 00000001d3b48006 CR4: 0000000000370ea0
+[ 748.001506] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 748.001483] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 748.001654] Call Trace:
+[ 748.000576]  ? mlx5_satisfy_startup_pages+0x290/0x290 [mlx5_core]
+[ 748.001416]  ? mlx5_cmd_teardown_hca+0xa2/0xd0 [mlx5_core]
+[ 748.001354]  ? mlx5_cmd_init_hca+0x280/0x280 [mlx5_core]
+[ 748.001203]  mlx5_function_teardown+0x30/0x60 [mlx5_core]
+[ 748.001275]  mlx5_uninit_one+0xa7/0xc0 [mlx5_core]
+[ 748.001200]  remove_one+0x5f/0xc0 [mlx5_core]
+[ 748.001075]  pci_device_remove+0x9f/0x1d0
+[ 748.000833]  device_release_driver_internal+0x1e0/0x490
+[ 748.001207]  unbind_store+0x19f/0x200
+[ 748.000942]  ? sysfs_file_ops+0x170/0x170
+[ 748.001000]  kernfs_fop_write_iter+0x2bc/0x450
+[ 748.000970]  new_sync_write+0x373/0x610
+[ 748.001124]  ? new_sync_read+0x600/0x600
+[ 748.001057]  ? lock_acquire+0x4d6/0x700
+[ 748.000908]  ? lockdep_hardirqs_on_prepare+0x400/0x400
+[ 748.001126]  ? fd_install+0x1c9/0x4d0
+[ 748.000951]  vfs_write+0x4d0/0x800
+[ 748.000804]  ksys_write+0xf9/0x1d0
+[ 748.000868]  ? __x64_sys_read+0xb0/0xb0
+[ 748.000811]  ? filp_open+0x50/0x50
+[ 748.000919]  ? syscall_enter_from_user_mode+0x1d/0x50
+[ 748.001223]  do_syscall_64+0x3f/0x80
+[ 748.000892]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+[ 748.001026] RIP: 0033:0x7f58bcfb22f7
+[ 748.000944] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
+[ 748.003925] RSP: 002b:00007fffd7f2aaa8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 748.001732] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f58bcfb22f7
+[ 748.001426] RDX: 000000000000000d RSI: 00007fffd7f2abc0 RDI: 0000000000000003
+[ 748.001746] RBP: 00007fffd7f2abc0 R08: 0000000000000000 R09: 0000000000000001
+[ 748.001631] R10: 00000000000001b6 R11: 0000000000000246 R12: 000000000000000d
+[ 748.001537] R13: 00005597ac2c24a0 R14: 000000000000000d R15: 00007f58bd084700
+[ 748.001564] irq event stamp: 0
+[ 748.000787] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+[ 748.001399] hardirqs last disabled at (0): [<ffffffff813132cf>] copy_process+0x146f/0x5eb0
+[ 748.001854] softirqs last  enabled at (0): [<ffffffff8131330e>] copy_process+0x14ae/0x5eb0
+[ 748.013431] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[ 748.001492] ---[ end trace a6fabd773d1c51ae ]---
+
+Fix by destroying the send queue of a hairpin peer net device that is
+being removed/unbound, which returns the allocated ring buffer pages to
+the host.
+
+Fixes: 4d8fcf216c90 ("net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin rules")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c   |  2 +-
+ .../ethernet/mellanox/mlx5/core/transobj.c    | 30 +++++++++++++++----
+ include/linux/mlx5/transobj.h                 |  1 +
+ 3 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index b633f669ea57..b3b8e44540a5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -4622,7 +4622,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+       list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
+               wait_for_completion(&hpe->res_ready);
+               if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
+-                      hpe->hp->pair->peer_gone = true;
++                      mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
+               mlx5e_hairpin_put(priv, hpe);
+       }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+index 01cc00ad8acf..b6931bbe52d2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+@@ -424,6 +424,15 @@ err_modify_sq:
+       return err;
+ }
++static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
++{
++      int i;
++
++      for (i = 0; i < hp->num_channels; i++)
++              mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
++                                     MLX5_SQC_STATE_RST, 0, 0);
++}
++
+ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+ {
+       int i;
+@@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+       for (i = 0; i < hp->num_channels; i++)
+               mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
+                                      MLX5_RQC_STATE_RST, 0, 0);
+-
+       /* unset peer SQs */
+-      if (hp->peer_gone)
+-              return;
+-      for (i = 0; i < hp->num_channels; i++)
+-              mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+-                                     MLX5_SQC_STATE_RST, 0, 0);
++      if (!hp->peer_gone)
++              mlx5_hairpin_unpair_peer_sq(hp);
+ }
+ struct mlx5_hairpin *
+@@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
+       mlx5_hairpin_destroy_queues(hp);
+       kfree(hp);
+ }
++
++void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
++{
++      int i;
++
++      mlx5_hairpin_unpair_peer_sq(hp);
++
++      /* destroy peer SQ */
++      for (i = 0; i < hp->num_channels; i++)
++              mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
++
++      hp->peer_gone = true;
++}
+diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
+index 028f442530cf..60ffeb6b67ae 100644
+--- a/include/linux/mlx5/transobj.h
++++ b/include/linux/mlx5/transobj.h
+@@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
+                        struct mlx5_hairpin_params *params);
+ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
++void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
+ #endif /* __TRANSOBJ_H__ */
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh.patch b/queue-5.12/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh.patch
new file mode 100644 (file)
index 0000000..1fe3e77
--- /dev/null
@@ -0,0 +1,283 @@
+From cd14585a8542e0831130a18e5f6b57568b543362 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 May 2021 16:28:39 +0300
+Subject: net/mlx5e: Fix use-after-free of encap entry in neigh update handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit fb1a3132ee1ac968316e45d21a48703a6db0b6c3 ]
+
+Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock
+removal from TC filter update path and properly handle concurrent encap
+entry insertion/deletion which can lead to following use-after-free:
+
+ [23827.464923] ==================================================================
+ [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635
+ [23827.472251]
+ [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5
+ [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
+ [23827.476731] Call Trace:
+ [23827.477260]  dump_stack+0xbb/0x107
+ [23827.477906]  print_address_description.constprop.0+0x18/0x140
+ [23827.478896]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.479879]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.480905]  kasan_report.cold+0x7c/0xd8
+ [23827.481701]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.482744]  kasan_check_range+0x145/0x1a0
+ [23827.493112]  mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.494054]  ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core]
+ [23827.495296]  mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core]
+ [23827.496338]  ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core]
+ [23827.497486]  ? read_word_at_a_time+0xe/0x20
+ [23827.498250]  ? strscpy+0xa0/0x2a0
+ [23827.498889]  process_one_work+0x8ac/0x14e0
+ [23827.499638]  ? lockdep_hardirqs_on_prepare+0x400/0x400
+ [23827.500537]  ? pwq_dec_nr_in_flight+0x2c0/0x2c0
+ [23827.501359]  ? rwlock_bug.part.0+0x90/0x90
+ [23827.502116]  worker_thread+0x53b/0x1220
+ [23827.502831]  ? process_one_work+0x14e0/0x14e0
+ [23827.503627]  kthread+0x328/0x3f0
+ [23827.504254]  ? _raw_spin_unlock_irq+0x24/0x40
+ [23827.505065]  ? __kthread_bind_mask+0x90/0x90
+ [23827.505912]  ret_from_fork+0x1f/0x30
+ [23827.506621]
+ [23827.506987] Allocated by task 28248:
+ [23827.507694]  kasan_save_stack+0x1b/0x40
+ [23827.508476]  __kasan_kmalloc+0x7c/0x90
+ [23827.509197]  mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core]
+ [23827.510194]  mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core]
+ [23827.511218]  __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core]
+ [23827.512234]  mlx5e_configure_flower+0x191c/0x4870 [mlx5_core]
+ [23827.513298]  tc_setup_cb_add+0x1d5/0x420
+ [23827.514023]  fl_hw_replace_filter+0x382/0x6a0 [cls_flower]
+ [23827.514975]  fl_change+0x2ceb/0x4a51 [cls_flower]
+ [23827.515821]  tc_new_tfilter+0x89a/0x2070
+ [23827.516548]  rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.517300]  netlink_rcv_skb+0x11d/0x340
+ [23827.518021]  netlink_unicast+0x42b/0x700
+ [23827.518742]  netlink_sendmsg+0x743/0xc20
+ [23827.519467]  sock_sendmsg+0xb2/0xe0
+ [23827.520131]  ____sys_sendmsg+0x590/0x770
+ [23827.520851]  ___sys_sendmsg+0xd8/0x160
+ [23827.521552]  __sys_sendmsg+0xb7/0x140
+ [23827.522238]  do_syscall_64+0x3a/0x70
+ [23827.522907]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.523797]
+ [23827.524163] Freed by task 25948:
+ [23827.524780]  kasan_save_stack+0x1b/0x40
+ [23827.525488]  kasan_set_track+0x1c/0x30
+ [23827.526187]  kasan_set_free_info+0x20/0x30
+ [23827.526968]  __kasan_slab_free+0xed/0x130
+ [23827.527709]  slab_free_freelist_hook+0xcf/0x1d0
+ [23827.528528]  kmem_cache_free_bulk+0x33a/0x6e0
+ [23827.529317]  kfree_rcu_work+0x55f/0xb70
+ [23827.530024]  process_one_work+0x8ac/0x14e0
+ [23827.530770]  worker_thread+0x53b/0x1220
+ [23827.531480]  kthread+0x328/0x3f0
+ [23827.532114]  ret_from_fork+0x1f/0x30
+ [23827.532785]
+ [23827.533147] Last potentially related work creation:
+ [23827.534007]  kasan_save_stack+0x1b/0x40
+ [23827.534710]  kasan_record_aux_stack+0xab/0xc0
+ [23827.535492]  kvfree_call_rcu+0x31/0x7b0
+ [23827.536206]  mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core]
+ [23827.537305]  mlx5e_flow_put+0x49/0x80 [mlx5_core]
+ [23827.538290]  mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core]
+ [23827.539300]  tc_setup_cb_destroy+0x18e/0x2f0
+ [23827.540144]  fl_hw_destroy_filter+0x1d2/0x310 [cls_flower]
+ [23827.541148]  __fl_delete+0x4dc/0x660 [cls_flower]
+ [23827.541985]  fl_delete+0x97/0x160 [cls_flower]
+ [23827.542782]  tc_del_tfilter+0x7ab/0x13d0
+ [23827.543503]  rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.544257]  netlink_rcv_skb+0x11d/0x340
+ [23827.544981]  netlink_unicast+0x42b/0x700
+ [23827.545700]  netlink_sendmsg+0x743/0xc20
+ [23827.546424]  sock_sendmsg+0xb2/0xe0
+ [23827.547084]  ____sys_sendmsg+0x590/0x770
+ [23827.547850]  ___sys_sendmsg+0xd8/0x160
+ [23827.548606]  __sys_sendmsg+0xb7/0x140
+ [23827.549303]  do_syscall_64+0x3a/0x70
+ [23827.549969]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.550853]
+ [23827.551217] The buggy address belongs to the object at ffff8881d1322200
+ [23827.551217]  which belongs to the cache kmalloc-256 of size 256
+ [23827.553341] The buggy address is located 140 bytes inside of
+ [23827.553341]  256-byte region [ffff8881d1322200, ffff8881d1322300)
+ [23827.555747] The buggy address belongs to the page:
+ [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320
+ [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0
+ [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
+ [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40
+ [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
+ [23827.564112] page dumped because: kasan: bad access detected
+ [23827.565439]
+ [23827.565932] Memory state around the buggy address:
+ [23827.566917]  ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.568485]  ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.571143]                       ^
+ [23827.571879]  ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.573283]  ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.574654] ==================================================================
+
+Most of the necessary logic is already correctly implemented by
+mlx5e_get_next_valid_encap() helper that is used in neigh stats update
+handler. Make the handler generic by renaming it to
+mlx5e_get_next_matching_encap() and use callback to test whether flow is
+matching instead of hardcoded check for 'valid' flag value. Implement
+mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap()
+with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new
+mlx5e_get_next_init_encap() helper by calling
+mlx5e_get_next_matching_encap() with callback that tests encap completion
+result to be non-error and use it in mlx5e_rep_neigh_update() to safely
+iterate over nhe->encap_list.
+
+Remove encap completion logic from mlx5e_rep_update_flows() since the encap
+entries passed to this function are already guaranteed to be properly
+initialized by similar code in mlx5e_get_next_init_encap().
+
+Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en/rep/neigh.c         | 15 ++++-----
+ .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  6 +---
+ .../mellanox/mlx5/core/en/tc_tun_encap.c      | 33 +++++++++++++++++--
+ .../net/ethernet/mellanox/mlx5/core/en_tc.h   |  3 ++
+ 4 files changed, 40 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+index be0ee03de721..2e9bee4e5209 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+@@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
+                                                            work);
+       struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+       struct neighbour *n = update_work->n;
++      struct mlx5e_encap_entry *e = NULL;
+       bool neigh_connected, same_dev;
+-      struct mlx5e_encap_entry *e;
+       unsigned char ha[ETH_ALEN];
+-      struct mlx5e_priv *priv;
+       u8 nud_state, dead;
+       rtnl_lock();
+@@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
+       if (!same_dev)
+               goto out;
+-      list_for_each_entry(e, &nhe->encap_list, encap_list) {
+-              if (!mlx5e_encap_take(e))
+-                      continue;
++      /* mlx5e_get_next_init_encap() releases previous encap before returning
++       * the next one.
++       */
++      while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
++              mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
+-              priv = netdev_priv(e->out_dev);
+-              mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+-              mlx5e_encap_put(priv, e);
+-      }
+ out:
+       rtnl_unlock();
+       mlx5e_release_neigh_update_work(update_work);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+index 96ba027dbef3..9992f94f794b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+@@ -93,13 +93,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+       ASSERT_RTNL();
+-      /* wait for encap to be fully initialized */
+-      wait_for_completion(&e->res_ready);
+-
+       mutex_lock(&esw->offloads.encap_tbl_lock);
+       encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+-      if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+-                                  ether_addr_equal(e->h_dest, ha)))
++      if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+               goto unlock;
+       mlx5e_take_all_encap_flows(e, &flow_list);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index 1560fcbf4ac7..a17d79effa27 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -250,9 +250,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+               mlx5e_take_tmp_flow(flow, flow_list, 0);
+ }
++typedef bool (match_cb)(struct mlx5e_encap_entry *);
++
+ static struct mlx5e_encap_entry *
+-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+-                         struct mlx5e_encap_entry *e)
++mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
++                            struct mlx5e_encap_entry *e,
++                            match_cb match)
+ {
+       struct mlx5e_encap_entry *next = NULL;
+@@ -287,7 +290,7 @@ retry:
+       /* wait for encap to be fully initialized */
+       wait_for_completion(&next->res_ready);
+       /* continue searching if encap entry is not in valid state after completion */
+-      if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
++      if (!match(next)) {
+               e = next;
+               goto retry;
+       }
+@@ -295,6 +298,30 @@ retry:
+       return next;
+ }
++static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
++{
++      return e->flags & MLX5_ENCAP_ENTRY_VALID;
++}
++
++static struct mlx5e_encap_entry *
++mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
++                         struct mlx5e_encap_entry *e)
++{
++      return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
++}
++
++static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
++{
++      return e->compl_result >= 0;
++}
++
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++                        struct mlx5e_encap_entry *e)
++{
++      return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
++}
++
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+ {
+       struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+index 25c091795bcd..17027536efba 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+@@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
+ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+ struct mlx5e_neigh_hash_entry;
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++                        struct mlx5e_encap_entry *e);
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+ void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-mlx5e-remove-dependency-in-ipsec-initialization-.patch b/queue-5.12/net-mlx5e-remove-dependency-in-ipsec-initialization-.patch
new file mode 100644 (file)
index 0000000..40ffa51
--- /dev/null
@@ -0,0 +1,43 @@
+From 20e0c5a5df38bbfb58c664a92748864aa7513662 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 13:20:32 -0500
+Subject: net/mlx5e: Remove dependency in IPsec initialization flows
+
+From: Huy Nguyen <huyn@nvidia.com>
+
+[ Upstream commit 8ad893e516a77209a1818a2072d2027d87db809f ]
+
+Currently, IPsec feature is disabled because mlx5e_build_nic_netdev
+is required to be called after mlx5e_ipsec_init. This requirement is
+invalid as mlx5e_build_nic_netdev and mlx5e_ipsec_init initialize
+independent resources.
+
+Remove ipsec pointer check in mlx5e_build_nic_netdev so that the
+two functions can be called at any order.
+
+Fixes: 547eede070eb ("net/mlx5e: IPSec, Innova IPSec offload infrastructure")
+Signed-off-by: Huy Nguyen <huyn@nvidia.com>
+Reviewed-by: Raed Salem <raeds@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 3d45341e2216..26f7fab109d9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct net_device *netdev = priv->netdev;
+-      if (!priv->ipsec)
+-              return;
+-
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
+           !MLX5_CAP_ETH(mdev, swp)) {
+               mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-qrtr-fix-oob-read-in-qrtr_endpoint_post.patch b/queue-5.12/net-qrtr-fix-oob-read-in-qrtr_endpoint_post.patch
new file mode 100644 (file)
index 0000000..8c956c2
--- /dev/null
@@ -0,0 +1,50 @@
+From f5ecb9b951cf04278170ec6420272483a18e6cd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:06:50 +0300
+Subject: net: qrtr: fix OOB Read in qrtr_endpoint_post
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit ad9d24c9429e2159d1e279dc3a83191ccb4daf1d ]
+
+Syzbot reported slab-out-of-bounds Read in
+qrtr_endpoint_post. The problem was in wrong
+_size_ type:
+
+       if (len != ALIGN(size, 4) + hdrlen)
+               goto err;
+
+If size from qrtr_hdr is 4294967293 (0xfffffffd), the result of
+ALIGN(size, 4) will be 0. In case of len == hdrlen and size == 4294967293
+in header this check won't fail and
+
+       skb_put_data(skb, data + hdrlen, size);
+
+will read out of bound from data, which is hdrlen allocated block.
+
+Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets")
+Reported-and-tested-by: syzbot+1917d778024161609247@syzkaller.appspotmail.com
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/qrtr/qrtr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
+index 1e4fb568fa84..24f10bf7d8a3 100644
+--- a/net/qrtr/qrtr.c
++++ b/net/qrtr/qrtr.c
+@@ -435,7 +435,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
+       struct qrtr_sock *ipc;
+       struct sk_buff *skb;
+       struct qrtr_cb *cb;
+-      unsigned int size;
++      size_t size;
+       unsigned int ver;
+       size_t hdrlen;
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-qualcomm-rmnet-don-t-over-count-statistics.patch b/queue-5.12/net-qualcomm-rmnet-don-t-over-count-statistics.patch
new file mode 100644 (file)
index 0000000..9f80067
--- /dev/null
@@ -0,0 +1,79 @@
+From 610ab271b1ee77934768e267cdac94c7da62eff6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 13:26:00 -0500
+Subject: net: qualcomm: rmnet: don't over-count statistics
+
+From: Alex Elder <elder@linaro.org>
+
+[ Upstream commit 994c393bb6886d6d94d628475b274a8cb3fc67a4 ]
+
+The purpose of the loop using u64_stats_fetch_*_irq() is to ensure
+statistics on a given CPU are collected atomically. If one of the
+statistics values gets updated within the begin/retry window, the
+loop will run again.
+
+Currently the statistics totals are updated inside that window.
+This means that if the loop ever retries, the statistics for the
+CPU will be counted more than once.
+
+Fix this by taking a snapshot of a CPU's statistics inside the
+protected window, and then updating the counters with the snapshot
+values after exiting the loop.
+
+(Also add a newline at the end of this file...)
+
+Fixes: 192c4b5d48f2a ("net: qualcomm: rmnet: Add support for 64 bit stats")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c    | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+index 41fbd2ceeede..ab1e0fcccabb 100644
+--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+@@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev,
+                             struct rtnl_link_stats64 *s)
+ {
+       struct rmnet_priv *priv = netdev_priv(dev);
+-      struct rmnet_vnd_stats total_stats;
++      struct rmnet_vnd_stats total_stats = { };
+       struct rmnet_pcpu_stats *pcpu_ptr;
++      struct rmnet_vnd_stats snapshot;
+       unsigned int cpu, start;
+-      memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
+-
+       for_each_possible_cpu(cpu) {
+               pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
+               do {
+                       start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
+-                      total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
+-                      total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
+-                      total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
+-                      total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
++                      snapshot = pcpu_ptr->stats;     /* struct assignment */
+               } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
+-              total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
++              total_stats.rx_pkts += snapshot.rx_pkts;
++              total_stats.rx_bytes += snapshot.rx_bytes;
++              total_stats.tx_pkts += snapshot.tx_pkts;
++              total_stats.tx_bytes += snapshot.tx_bytes;
++              total_stats.tx_drops += snapshot.tx_drops;
+       }
+       s->rx_packets = total_stats.rx_pkts;
+@@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port,
+       }
+       return 0;
+-}
+\ No newline at end of file
++}
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-rds-fix-memory-leak-in-rds_recvmsg.patch b/queue-5.12/net-rds-fix-memory-leak-in-rds_recvmsg.patch
new file mode 100644 (file)
index 0000000..52a8ae3
--- /dev/null
@@ -0,0 +1,73 @@
+From a5183df796f299f6954fa9fce9ae6807eb93cfe0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 11:06:41 +0300
+Subject: net: rds: fix memory leak in rds_recvmsg
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+[ Upstream commit 49bfcbfd989a8f1f23e705759a6bb099de2cff9f ]
+
+Syzbot reported memory leak in rds. The problem
+was in unputted refcount in case of error.
+
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+               int msg_flags)
+{
+...
+
+       if (!rds_next_incoming(rs, &inc)) {
+               ...
+       }
+
+After this "if" inc refcount incremented and
+
+       if (rds_cmsg_recv(inc, msg, rs)) {
+               ret = -EFAULT;
+               goto out;
+       }
+...
+out:
+       return ret;
+}
+
+in case of rds_cmsg_recv() fail the refcount won't be
+decremented. And it's easy to see from ftrace log, that
+rds_inc_addref() don't have rds_inc_put() pair in
+rds_recvmsg() after rds_cmsg_recv()
+
+ 1)               |  rds_recvmsg() {
+ 1)   3.721 us    |    rds_inc_addref();
+ 1)   3.853 us    |    rds_message_inc_copy_to_user();
+ 1) + 10.395 us   |    rds_cmsg_recv();
+ 1) + 34.260 us   |  }
+
+Fixes: bdbe6fbc6a2f ("RDS: recv.c")
+Reported-and-tested-by: syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
+Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rds/recv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/rds/recv.c b/net/rds/recv.c
+index aba4afe4dfed..967d115f97ef 100644
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+               if (rds_cmsg_recv(inc, msg, rs)) {
+                       ret = -EFAULT;
+-                      goto out;
++                      break;
+               }
+               rds_recvmsg_zcookie(rs, msg);
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-sched-act_ct-handle-dnat-tuple-collision.patch b/queue-5.12/net-sched-act_ct-handle-dnat-tuple-collision.patch
new file mode 100644 (file)
index 0000000..d2efc0e
--- /dev/null
@@ -0,0 +1,67 @@
+From 4a6665fa5c65999ad0fe0899a081aa966fc7007d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 11:23:56 -0300
+Subject: net/sched: act_ct: handle DNAT tuple collision
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 13c62f5371e3eb4fc3400cfa26e64ca75f888008 ]
+
+This this the counterpart of 8aa7b526dc0b ("openvswitch: handle DNAT
+tuple collision") for act_ct. From that commit changelog:
+
+"""
+With multiple DNAT rules it's possible that after destination
+translation the resulting tuples collide.
+
+...
+
+Netfilter handles this case by allocating a null binding for SNAT at
+egress by default.  Perform the same operation in openvswitch for DNAT
+if no explicit SNAT is requested by the user and allocate a null binding
+for SNAT for packets in the "original" direction.
+"""
+
+Fixes: 95219afbb980 ("act_ct: support asymmetric conntrack")
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index ba7f57cb41c3..143786d8cde0 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
+       }
+       err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+-      if (err == NF_ACCEPT &&
+-          ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
+-              if (maniptype == NF_NAT_MANIP_SRC)
+-                      maniptype = NF_NAT_MANIP_DST;
+-              else
+-                      maniptype = NF_NAT_MANIP_SRC;
+-
+-              err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
++      if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
++              if (ct->status & IPS_SRC_NAT) {
++                      if (maniptype == NF_NAT_MANIP_SRC)
++                              maniptype = NF_NAT_MANIP_DST;
++                      else
++                              maniptype = NF_NAT_MANIP_SRC;
++
++                      err = ct_nat_execute(skb, ct, ctinfo, range,
++                                           maniptype);
++              } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
++                      err = ct_nat_execute(skb, ct, ctinfo, NULL,
++                                           NF_NAT_MANIP_SRC);
++              }
+       }
+       return err;
+ #else
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-stmmac-dwmac1000-fix-extended-mac-address-regist.patch b/queue-5.12/net-stmmac-dwmac1000-fix-extended-mac-address-regist.patch
new file mode 100644 (file)
index 0000000..72d9c2b
--- /dev/null
@@ -0,0 +1,42 @@
+From f38d9b13d27aaf69b252401242437b383933f63b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Jun 2021 15:16:11 +0800
+Subject: net: stmmac: dwmac1000: Fix extended MAC address registers definition
+
+From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+
+[ Upstream commit 1adb20f0d496b2c61e9aa1f4761b8d71f93d258e ]
+
+The register starts from 0x800 is the 16th MAC address register rather
+than the first one.
+
+Fixes: cffb13f4d6fb ("stmmac: extend mac addr reg and fix perfect filering")
+Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+index b70d44ac0990..3c73453725f9 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+@@ -76,10 +76,10 @@ enum power_event {
+ #define LPI_CTRL_STATUS_TLPIEN        0x00000001      /* Transmit LPI Entry */
+ /* GMAC HW ADDR regs */
+-#define GMAC_ADDR_HIGH(reg)   (((reg > 15) ? 0x00000800 : 0x00000040) + \
+-                              (reg * 8))
+-#define GMAC_ADDR_LOW(reg)    (((reg > 15) ? 0x00000804 : 0x00000044) + \
+-                              (reg * 8))
++#define GMAC_ADDR_HIGH(reg)   ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
++                               0x00000040 + (reg * 8))
++#define GMAC_ADDR_LOW(reg)    ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \
++                               0x00000044 + (reg * 8))
+ #define GMAC_MAX_PERFECT_ADDRESSES    1
+ #define GMAC_PCS_BASE         0x000000c0      /* PCS register base */
+-- 
+2.30.2
+
diff --git a/queue-5.12/net-usb-fix-possible-use-after-free-in-smsc75xx_bind.patch b/queue-5.12/net-usb-fix-possible-use-after-free-in-smsc75xx_bind.patch
new file mode 100644 (file)
index 0000000..f140cd1
--- /dev/null
@@ -0,0 +1,72 @@
+From 49654c7e2d29da5f367d58655dd3f9e5bde99d07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jun 2021 10:48:33 +0800
+Subject: net: usb: fix possible use-after-free in smsc75xx_bind
+
+From: Dongliang Mu <mudongliangabcd@gmail.com>
+
+[ Upstream commit 56b786d86694e079d8aad9b314e015cd4ac02a3d ]
+
+The commit 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
+fails to clean up the work scheduled in smsc75xx_reset->
+smsc75xx_set_multicast, which leads to use-after-free if the work is
+scheduled to start after the deallocation. In addition, this patch
+also removes a dangling pointer - dev->data[0].
+
+This patch calls cancel_work_sync to cancel the scheduled work and set
+the dangling pointer to NULL.
+
+Fixes: 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
+Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc75xx.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
+index 76ed79bb1e3f..5281291711af 100644
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+       ret = smsc75xx_wait_ready(dev, 0);
+       if (ret < 0) {
+               netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
+-              goto err;
++              goto free_pdata;
+       }
+       smsc75xx_init_mac_address(dev);
+@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+       ret = smsc75xx_reset(dev);
+       if (ret < 0) {
+               netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
+-              goto err;
++              goto cancel_work;
+       }
+       dev->net->netdev_ops = &smsc75xx_netdev_ops;
+@@ -1503,8 +1503,11 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
+       dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
+       return 0;
+-err:
++cancel_work:
++      cancel_work_sync(&pdata->set_multicast);
++free_pdata:
+       kfree(pdata);
++      dev->data[0] = 0;
+       return ret;
+ }
+@@ -1515,7 +1518,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
+               cancel_work_sync(&pdata->set_multicast);
+               netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+               kfree(pdata);
+-              pdata = NULL;
+               dev->data[0] = 0;
+       }
+ }
+-- 
+2.30.2
+
diff --git a/queue-5.12/netfilter-nf_tables-initialize-set-before-expression.patch b/queue-5.12/netfilter-nf_tables-initialize-set-before-expression.patch
new file mode 100644 (file)
index 0000000..a574c1c
--- /dev/null
@@ -0,0 +1,173 @@
+From 6ae3cf75093f67de3f00ae738572c778ae814bd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Jun 2021 03:07:28 +0200
+Subject: netfilter: nf_tables: initialize set before expression setup
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit ad9f151e560b016b6ad3280b48e42fa11e1a5440 ]
+
+nft_set_elem_expr_alloc() needs an initialized set if expression sets on
+the NFT_EXPR_GC flag. Move set fields initialization before expression
+setup.
+
+[4512935.019450] ==================================================================
+[4512935.019456] BUG: KASAN: null-ptr-deref in nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019487] Read of size 8 at addr 0000000000000070 by task nft/23532
+[4512935.019494] CPU: 1 PID: 23532 Comm: nft Not tainted 5.12.0-rc4+ #48
+[...]
+[4512935.019502] Call Trace:
+[4512935.019505]  dump_stack+0x89/0xb4
+[4512935.019512]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019536]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019560]  kasan_report.cold.12+0x5f/0xd8
+[4512935.019566]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019590]  nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
+[4512935.019615]  nf_tables_newset+0xc7f/0x1460 [nf_tables]
+
+Reported-by: syzbot+ce96ca2b1d0b37c6422d@syzkaller.appspotmail.com
+Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 83 ++++++++++++++++++-----------------
+ 1 file changed, 42 insertions(+), 41 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 31016c144c48..9d5ea2352965 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4317,13 +4317,44 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
+       err = nf_tables_set_alloc_name(&ctx, set, name);
+       kfree(name);
+       if (err < 0)
+-              goto err_set_alloc_name;
++              goto err_set_name;
++
++      udata = NULL;
++      if (udlen) {
++              udata = set->data + size;
++              nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
++      }
++
++      INIT_LIST_HEAD(&set->bindings);
++      set->table = table;
++      write_pnet(&set->net, net);
++      set->ops = ops;
++      set->ktype = ktype;
++      set->klen = desc.klen;
++      set->dtype = dtype;
++      set->objtype = objtype;
++      set->dlen = desc.dlen;
++      set->flags = flags;
++      set->size = desc.size;
++      set->policy = policy;
++      set->udlen = udlen;
++      set->udata = udata;
++      set->timeout = timeout;
++      set->gc_int = gc_int;
++
++      set->field_count = desc.field_count;
++      for (i = 0; i < desc.field_count; i++)
++              set->field_len[i] = desc.field_len[i];
++
++      err = ops->init(set, &desc, nla);
++      if (err < 0)
++              goto err_set_init;
+       if (nla[NFTA_SET_EXPR]) {
+               expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
+               if (IS_ERR(expr)) {
+                       err = PTR_ERR(expr);
+-                      goto err_set_alloc_name;
++                      goto err_set_expr_alloc;
+               }
+               set->exprs[0] = expr;
+               set->num_exprs++;
+@@ -4334,74 +4365,44 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
+               if (!(flags & NFT_SET_EXPR)) {
+                       err = -EINVAL;
+-                      goto err_set_alloc_name;
++                      goto err_set_expr_alloc;
+               }
+               i = 0;
+               nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+                       if (i == NFT_SET_EXPR_MAX) {
+                               err = -E2BIG;
+-                              goto err_set_init;
++                              goto err_set_expr_alloc;
+                       }
+                       if (nla_type(tmp) != NFTA_LIST_ELEM) {
+                               err = -EINVAL;
+-                              goto err_set_init;
++                              goto err_set_expr_alloc;
+                       }
+                       expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
+                       if (IS_ERR(expr)) {
+                               err = PTR_ERR(expr);
+-                              goto err_set_init;
++                              goto err_set_expr_alloc;
+                       }
+                       set->exprs[i++] = expr;
+                       set->num_exprs++;
+               }
+       }
+-      udata = NULL;
+-      if (udlen) {
+-              udata = set->data + size;
+-              nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+-      }
+-
+-      INIT_LIST_HEAD(&set->bindings);
+-      set->table = table;
+-      write_pnet(&set->net, net);
+-      set->ops   = ops;
+-      set->ktype = ktype;
+-      set->klen  = desc.klen;
+-      set->dtype = dtype;
+-      set->objtype = objtype;
+-      set->dlen  = desc.dlen;
+-      set->flags = flags;
+-      set->size  = desc.size;
+-      set->policy = policy;
+-      set->udlen  = udlen;
+-      set->udata  = udata;
+-      set->timeout = timeout;
+-      set->gc_int = gc_int;
+       set->handle = nf_tables_alloc_handle(table);
+-      set->field_count = desc.field_count;
+-      for (i = 0; i < desc.field_count; i++)
+-              set->field_len[i] = desc.field_len[i];
+-
+-      err = ops->init(set, &desc, nla);
+-      if (err < 0)
+-              goto err_set_init;
+-
+       err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+       if (err < 0)
+-              goto err_set_trans;
++              goto err_set_expr_alloc;
+       list_add_tail_rcu(&set->list, &table->sets);
+       table->use++;
+       return 0;
+-err_set_trans:
+-      ops->destroy(set);
+-err_set_init:
++err_set_expr_alloc:
+       for (i = 0; i < set->num_exprs; i++)
+               nft_expr_destroy(&ctx, set->exprs[i]);
+-err_set_alloc_name:
++
++      ops->destroy(set);
++err_set_init:
+       kfree(set->name);
+ err_set_name:
+       kvfree(set);
+-- 
+2.30.2
+
diff --git a/queue-5.12/netfilter-nft_fib_ipv6-skip-ipv6-packets-from-any-to.patch b/queue-5.12/netfilter-nft_fib_ipv6-skip-ipv6-packets-from-any-to.patch
new file mode 100644 (file)
index 0000000..3afc514
--- /dev/null
@@ -0,0 +1,69 @@
+From 5334a2dffae9fef08675ded525ad1d1f974b998a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 13:48:18 +0200
+Subject: netfilter: nft_fib_ipv6: skip ipv6 packets from any to link-local
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 12f36e9bf678a81d030ca1b693dcda62b55af7c5 ]
+
+The ip6tables rpfilter match has an extra check to skip packets with
+"::" source address.
+
+Extend this to ipv6 fib expression.  Else ipv6 duplicate address detection
+packets will fail rpf route check -- lookup returns -ENETUNREACH.
+
+While at it, extend the prerouting check to also cover the ingress hook.
+
+Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1543
+Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/netfilter/nft_fib_ipv6.c | 22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index e204163c7036..92f3235fa287 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ }
+ EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
++static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
++{
++      if (likely(next != IPPROTO_ICMPV6))
++              return false;
++
++      if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
++              return false;
++
++      return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
++}
++
+ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                  const struct nft_pktinfo *pkt)
+ {
+@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+       lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
+-      if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+-          nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+-              nft_fib_store_result(dest, priv, nft_in(pkt));
+-              return;
++      if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
++          nft_hook(pkt) == NF_INET_INGRESS) {
++              if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
++                  nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
++                      nft_fib_store_result(dest, priv, nft_in(pkt));
++                      return;
++              }
+       }
+       *dest = 0;
+-- 
+2.30.2
+
diff --git a/queue-5.12/netfilter-synproxy-fix-out-of-bounds-when-parsing-tc.patch b/queue-5.12/netfilter-synproxy-fix-out-of-bounds-when-parsing-tc.patch
new file mode 100644 (file)
index 0000000..c90d6b4
--- /dev/null
@@ -0,0 +1,59 @@
+From efc79d7aaae13e13b996fefb98b99438f0b14838 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:29 +0300
+Subject: netfilter: synproxy: Fix out of bounds when parsing TCP options
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit 5fc177ab759418c9537433e63301096e733fb915 ]
+
+The TCP option parser in synproxy (synproxy_parse_options) could read
+one byte out of bounds. When the length is 1, the execution flow gets
+into the loop, reads one byte of the opcode, and if the opcode is
+neither TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds
+the length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+v2 changes:
+
+Added an early return when length < 0 to avoid calling
+skb_header_pointer with negative length.
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: 48b1de4c110a ("netfilter: add SYNPROXY core/target")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_synproxy_core.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
+index b100c04a0e43..3d6d49420db8 100644
+--- a/net/netfilter/nf_synproxy_core.c
++++ b/net/netfilter/nf_synproxy_core.c
+@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+       int length = (th->doff * 4) - sizeof(*th);
+       u8 buf[40], *ptr;
++      if (unlikely(length < 0))
++              return false;
++
+       ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+       if (ptr == NULL)
+               return false;
+@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+                       length--;
+                       continue;
+               default:
++                      if (length < 2)
++                              return true;
+                       opsize = *ptr++;
+                       if (opsize < 2)
+                               return true;
+-- 
+2.30.2
+
diff --git a/queue-5.12/netxen_nic-fix-an-error-handling-path-in-netxen_nic_.patch b/queue-5.12/netxen_nic-fix-an-error-handling-path-in-netxen_nic_.patch
new file mode 100644 (file)
index 0000000..4bd4022
--- /dev/null
@@ -0,0 +1,37 @@
+From 3d8fdf3c44be6d0ccedb4c82e6fbe4a5174c8afa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 14:53:12 +0200
+Subject: netxen_nic: Fix an error handling path in 'netxen_nic_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 49a10c7b176295f8fafb338911cf028e97f65f4d ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: e87ad5539343 ("netxen: support pci error handlers")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+index 7e6bac85495d..344ea1143454 100644
+--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
++++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+@@ -1602,6 +1602,8 @@ err_out_free_netdev:
+       free_netdev(netdev);
+ err_out_free_res:
++      if (NX_IS_REVISION_P3(pdev->revision))
++              pci_disable_pcie_error_reporting(pdev);
+       pci_release_regions(pdev);
+ err_out_disable_pdev:
+-- 
+2.30.2
+
diff --git a/queue-5.12/ptp-improve-max_adj-check-against-unreasonable-value.patch b/queue-5.12/ptp-improve-max_adj-check-against-unreasonable-value.patch
new file mode 100644 (file)
index 0000000..dbe89a5
--- /dev/null
@@ -0,0 +1,80 @@
+From e73d0c8a1a1163e52cf2ca9a0ed08139d27c8cf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Jun 2021 15:24:05 -0700
+Subject: ptp: improve max_adj check against unreasonable values
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 475b92f932168a78da8109acd10bfb7578b8f2bb ]
+
+Scaled PPM conversion to PPB may (on 64bit systems) result
+in a value larger than s32 can hold (freq/scaled_ppm is a long).
+This means the kernel will not correctly reject unreasonably
+high ->freq values (e.g. > 4294967295ppb, 281474976645 scaled PPM).
+
+The conversion is equivalent to a division by ~66 (65.536),
+so the value of ppb is always smaller than ppm, but not small
+enough to assume narrowing the type from long -> s32 is okay.
+
+Note that reasonable user space (e.g. ptp4l) will not use such
+high values, anyway, 4289046510ppb ~= 4.3x, so the fix is
+somewhat pedantic.
+
+Fixes: d39a743511cd ("ptp: validate the requested frequency adjustment.")
+Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c          | 6 +++---
+ include/linux/ptp_clock_kernel.h | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 03a246e60fd9..21c4c34c52d8 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
+       spin_unlock_irqrestore(&queue->lock, flags);
+ }
+-s32 scaled_ppm_to_ppb(long ppm)
++long scaled_ppm_to_ppb(long ppm)
+ {
+       /*
+        * The 'freq' field in the 'struct timex' is in parts per
+@@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm)
+       s64 ppb = 1 + ppm;
+       ppb *= 125;
+       ppb >>= 13;
+-      return (s32) ppb;
++      return (long) ppb;
+ }
+ EXPORT_SYMBOL(scaled_ppm_to_ppb);
+@@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
+               delta = ktime_to_ns(kt);
+               err = ops->adjtime(ops, delta);
+       } else if (tx->modes & ADJ_FREQUENCY) {
+-              s32 ppb = scaled_ppm_to_ppb(tx->freq);
++              long ppb = scaled_ppm_to_ppb(tx->freq);
+               if (ppb > ops->max_adj || ppb < -ops->max_adj)
+                       return -ERANGE;
+               if (ops->adjfine)
+diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
+index 0d47fd33b228..51d7f1b8b32a 100644
+--- a/include/linux/ptp_clock_kernel.h
++++ b/include/linux/ptp_clock_kernel.h
+@@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
+  * @ppm:    Parts per million, but with a 16 bit binary fractional field
+  */
+-extern s32 scaled_ppm_to_ppb(long ppm);
++extern long scaled_ppm_to_ppb(long ppm);
+ /**
+  * ptp_find_pin() - obtain the pin index of a given auxiliary function
+-- 
+2.30.2
+
diff --git a/queue-5.12/qlcnic-fix-an-error-handling-path-in-qlcnic_probe.patch b/queue-5.12/qlcnic-fix-an-error-handling-path-in-qlcnic_probe.patch
new file mode 100644 (file)
index 0000000..ac001ce
--- /dev/null
@@ -0,0 +1,36 @@
+From eed8972fee58ba9a2f0da58f51449155d8361108 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 14:37:46 +0200
+Subject: qlcnic: Fix an error handling path in 'qlcnic_probe()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit cb3376604a676e0302258b01893911bdd7aa5278 ]
+
+If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
+must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
+call, as already done in the remove function.
+
+Fixes: 451724c821c1 ("qlcnic: aer support")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 96b947fde646..3beafc60747e 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2690,6 +2690,7 @@ err_out_free_hw_res:
+       kfree(ahw);
+ err_out_free_res:
++      pci_disable_pcie_error_reporting(pdev);
+       pci_release_regions(pdev);
+ err_out_disable_pdev:
+-- 
+2.30.2
+
diff --git a/queue-5.12/revert-net-mlx5-arm-only-eqs-with-eqes.patch b/queue-5.12/revert-net-mlx5-arm-only-eqs-with-eqes.patch
new file mode 100644 (file)
index 0000000..d8d7e2f
--- /dev/null
@@ -0,0 +1,80 @@
+From 3f5647a93eff159721da77305b0e1f52825a74f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Feb 2021 12:27:53 +0200
+Subject: Revert "net/mlx5: Arm only EQs with EQEs"
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 7a545077cb6701957e84c7f158630bb5c984e648 ]
+
+In the scenario described below, an EQ can remain in FIRED state which
+can result in missing an interrupt generation.
+
+The scenario:
+
+device                       mlx5_core driver
+------                       ----------------
+EQ1.eqe generated
+EQ1.MSI-X sent
+EQ1.state = FIRED
+EQ2.eqe generated
+                             mlx5_irq()
+                               polls - eq1_eqes()
+                               arm eq1
+                               polls - eq2_eqes()
+                               arm eq2
+EQ2.MSI-X sent
+EQ2.state = FIRED
+                              mlx5_irq()
+                              polls - eq2_eqes() -- no eqes found
+                              driver skips EQ arming;
+
+->EQ2 remains fired, misses generating interrupt.
+
+Hence, always arm the EQ by reverting the cited commit in fixes tag.
+
+Fixes: d894892dda25 ("net/mlx5: Arm only EQs with EQEs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index 1fa9c18563da..31c6a3b91f4a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
+       eqe = next_eqe_sw(eq);
+       if (!eqe)
+-              return 0;
++              goto out;
+       do {
+               struct mlx5_core_cq *cq;
+@@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
+               ++eq->cons_index;
+       } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
++
++out:
+       eq_update_ci(eq, 1);
+       if (cqn != -1)
+@@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
+               ++eq->cons_index;
+       } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+-      eq_update_ci(eq, 1);
+ out:
++      eq_update_ci(eq, 1);
+       mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
+       return unlikely(recovery) ? num_eqes : 0;
+-- 
+2.30.2
+
diff --git a/queue-5.12/rtnetlink-fix-regression-in-bridge-vlan-configuratio.patch b/queue-5.12/rtnetlink-fix-regression-in-bridge-vlan-configuratio.patch
new file mode 100644 (file)
index 0000000..dbbe9eb
--- /dev/null
@@ -0,0 +1,55 @@
+From 86ae57e238612501b8889e2d61ff2d0584d811e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 14:17:53 +0300
+Subject: rtnetlink: Fix regression in bridge VLAN configuration
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit d2e381c4963663bca6f30c3b996fa4dbafe8fcb5 ]
+
+Cited commit started returning errors when notification info is not
+filled by the bridge driver, resulting in the following regression:
+
+ # ip link add name br1 type bridge vlan_filtering 1
+ # bridge vlan add dev br1 vid 555 self pvid untagged
+ RTNETLINK answers: Invalid argument
+
+As long as the bridge driver does not fill notification info for the
+bridge device itself, an empty notification should not be considered as
+an error. This is explained in commit 59ccaaaa49b5 ("bridge: dont send
+notification when skb->len == 0 in rtnl_bridge_notify").
+
+Fix by removing the error and add a comment to avoid future bugs.
+
+Fixes: a8db57c1d285 ("rtnetlink: Fix missing error code in rtnl_bridge_notify()")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 9ad046917b34..2123427883ba 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4833,10 +4833,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
+       if (err < 0)
+               goto errout;
+-      if (!skb->len) {
+-              err = -EINVAL;
++      /* Notification info is only filled for bridge ports, not the bridge
++       * device itself. Therefore, a zero notification length is valid and
++       * should not result in an error.
++       */
++      if (!skb->len)
+               goto errout;
+-      }
+       rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+       return 0;
+-- 
+2.30.2
+
diff --git a/queue-5.12/sch_cake-fix-out-of-bounds-when-parsing-tcp-options-.patch b/queue-5.12/sch_cake-fix-out-of-bounds-when-parsing-tcp-options-.patch
new file mode 100644 (file)
index 0000000..ba00903
--- /dev/null
@@ -0,0 +1,72 @@
+From 7b48d4aba38d5a6cf2d06e9b9b0911de3af6e679 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 19:40:31 +0300
+Subject: sch_cake: Fix out of bounds when parsing TCP options and header
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit ba91c49dedbde758ba0b72f57ac90b06ddf8e548 ]
+
+The TCP option parser in cake qdisc (cake_get_tcpopt and
+cake_tcph_may_drop) could read one byte out of bounds. When the length
+is 1, the execution flow gets into the loop, reads one byte of the
+opcode, and if the opcode is neither TCPOPT_EOL nor TCPOPT_NOP, it reads
+one more byte, which exceeds the length of 1.
+
+This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
+out of bounds when parsing TCP options.").
+
+v2 changes:
+
+Added doff validation in cake_get_tcphdr to avoid parsing garbage as TCP
+header. Although it wasn't strictly an out-of-bounds access (memory was
+allocated), garbage values could be read where CAKE expected the TCP
+header if doff was smaller than 5.
+
+Cc: Young Xiao <92siuyang@gmail.com>
+Fixes: 8b7138814f29 ("sch_cake: Add optional ACK filter")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cake.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
+index 7d37638ee1c7..5c15968b5155 100644
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+       }
+       tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+-      if (!tcph)
++      if (!tcph || tcph->doff < 5)
+               return NULL;
+       return skb_header_pointer(skb, offset,
+@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+                       length--;
+                       continue;
+               }
++              if (length < 2)
++                      break;
+               opsize = *ptr++;
+               if (opsize < 2 || opsize > length)
+                       break;
+@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+                       length--;
+                       continue;
+               }
++              if (length < 2)
++                      break;
+               opsize = *ptr++;
+               if (opsize < 2 || opsize > length)
+                       break;
+-- 
+2.30.2
+
diff --git a/queue-5.12/selftests-mptcp-enable-syncookie-only-in-absence-of-.patch b/queue-5.12/selftests-mptcp-enable-syncookie-only-in-absence-of-.patch
new file mode 100644 (file)
index 0000000..d33bc06
--- /dev/null
@@ -0,0 +1,55 @@
+From 66cd5f79b5fac77a0ffaaa5dd80464daa578293a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:59:43 -0700
+Subject: selftests: mptcp: enable syncookie only in absence of reorders
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 2395da0e17935ce9158cdfae433962bdb6cbfa67 ]
+
+Syncookie validation may fail for OoO packets, causing spurious
+resets and self-tests failures, so let's force syncookie only
+for tests iteration with no OoO.
+
+Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally")
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/198
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_connect.sh | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+index 65b3b983efc2..8763706b0d04 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
+ ip -net "$ns4" route add default via 10.0.3.2
+ ip -net "$ns4" route add default via dead:beef:3::2
+-# use TCP syn cookies, even if no flooding was detected.
+-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+-
+ set_ethtool_flags() {
+       local ns="$1"
+       local dev="$2"
+@@ -711,6 +708,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
+               exit $ret
+       fi
++      # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
++      # mptcp syncookie support.
++      if [ $sender = $ns1 ]; then
++              ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
++      else
++              ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
++      fi
++
+       run_tests "$ns2" $sender 10.0.1.2
+       run_tests "$ns2" $sender dead:beef:1::2
+       run_tests "$ns2" $sender 10.0.2.1
+-- 
+2.30.2
+
index a15040ea34bb88013c65c1969327318561c5b764..4697fd68a5c3aafecaf3cdf5b43ecbf04b013749 100644 (file)
@@ -10,3 +10,78 @@ afs-fix-an-is_err-vs-null-check.patch
 mm-memory-failure-make-sure-wait-for-page-writeback-.patch
 kvm-lapic-restore-guard-to-prevent-illegal-apic-regi.patch
 fanotify-fix-copy_event_to_user-fid-error-clean-up.patch
+batman-adv-avoid-warn_on-timing-related-checks.patch
+staging-rtl8723bs-fix-monitor-netdev-register-unregi.patch
+mac80211-fix-skb-length-check-in-ieee80211_scan_rx.patch
+mlxsw-reg-spectrum-3-enforce-lowest-max-shaper-burst.patch
+mlxsw-core-set-thermal-zone-polling-delay-argument-t.patch
+libbpf-fixes-incorrect-rx_ring_setup_done.patch
+net-ipv4-fix-memory-leak-in-netlbl_cipsov4_add_std.patch
+vrf-fix-maximum-mtu.patch
+net-rds-fix-memory-leak-in-rds_recvmsg.patch
+net-dsa-felix-re-enable-tx-flow-control-in-ocelot_po.patch
+net-ena-fix-dma-mapping-function-issues-in-xdp.patch
+net-lantiq-disable-interrupt-before-sheduling-napi.patch
+netfilter-nf_tables-initialize-set-before-expression.patch
+netfilter-nft_fib_ipv6-skip-ipv6-packets-from-any-to.patch
+ice-add-ndo_bpf-callback-for-safe-mode-netdev-ops.patch
+ice-parameterize-functions-responsible-for-tx-ring-m.patch
+udp-fix-race-between-close-and-udp_abort.patch
+rtnetlink-fix-regression-in-bridge-vlan-configuratio.patch
+net-sched-act_ct-handle-dnat-tuple-collision.patch
+net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh.patch
+net-mlx5e-remove-dependency-in-ipsec-initialization-.patch
+net-mlx5e-fix-page-reclaim-for-dead-peer-hairpin.patch
+net-mlx5-consider-roce-cap-before-init-rdma-resource.patch
+net-mlx5-dr-don-t-use-sw-steering-when-roce-is-not-s.patch
+revert-net-mlx5-arm-only-eqs-with-eqes.patch
+net-mlx5e-block-offload-of-outer-header-csum-for-udp.patch
+net-mlx5e-block-offload-of-outer-header-csum-for-gre.patch
+skbuff-fix-incorrect-msg_zerocopy-copy-notifications.patch
+netfilter-synproxy-fix-out-of-bounds-when-parsing-tc.patch
+mptcp-fix-out-of-bounds-when-parsing-tcp-options.patch
+sch_cake-fix-out-of-bounds-when-parsing-tcp-options-.patch
+mptcp-try-harder-to-borrow-memory-from-subflow-under.patch
+mptcp-wake-up-readers-only-for-in-sequence-data.patch
+mptcp-do-not-warn-on-bad-input-from-the-network.patch
+selftests-mptcp-enable-syncookie-only-in-absence-of-.patch
+mptcp-fix-soft-lookup-in-subflow_error_report.patch
+alx-fix-an-error-handling-path-in-alx_probe.patch
+cxgb4-fix-endianness-when-flashing-boot-image.patch
+cxgb4-fix-sleep-in-atomic-when-flashing-phy-firmware.patch
+cxgb4-halt-chip-before-flashing-phy-firmware-image.patch
+net-stmmac-dwmac1000-fix-extended-mac-address-regist.patch
+net-make-get_net_ns-return-error-if-net_ns-is-disabl.patch
+net-qualcomm-rmnet-don-t-over-count-statistics.patch
+ethtool-strset-fix-message-length-calculation.patch
+qlcnic-fix-an-error-handling-path-in-qlcnic_probe.patch
+netxen_nic-fix-an-error-handling-path-in-netxen_nic_.patch
+cxgb4-fix-wrong-ethtool-n-tuple-rule-lookup.patch
+ipv4-fix-device-used-for-dst_alloc-with-local-routes.patch
+net-qrtr-fix-oob-read-in-qrtr_endpoint_post.patch
+bpf-fix-leakage-under-speculation-on-mispredicted-br.patch
+net-mhi_net-update-the-transmit-handler-prototype.patch
+ptp-improve-max_adj-check-against-unreasonable-value.patch
+net-cdc_ncm-switch-to-eth-d-interface-naming.patch
+lantiq-net-fix-duplicated-skb-in-rx-descriptor-ring.patch
+net-usb-fix-possible-use-after-free-in-smsc75xx_bind.patch
+net-fec_ptp-fix-issue-caused-by-refactor-the-fec_dev.patch
+net-ipv4-fix-memory-leak-in-ip_mc_add1_src.patch
+net-af_unix-fix-a-data-race-in-unix_dgram_sendmsg-un.patch
+net-mlx5-fix-error-path-for-set-hca-defaults.patch
+net-mlx5-check-that-driver-was-probed-prior-attachin.patch
+net-mlx5-e-switch-read-pf-mac-address.patch
+net-mlx5-e-switch-allow-setting-guid-for-host-pf-vpo.patch
+net-mlx5-sf_dev-remove-sf-device-on-invalid-state.patch
+net-mlx5-dr-fix-stev1-incorrect-l3-decapsulation-pad.patch
+net-mlx5e-don-t-create-devices-during-unload-flow.patch
+net-mlx5-reset-mkey-index-on-creation.patch
+be2net-fix-an-error-handling-path-in-be_probe.patch
+net-hamradio-fix-memory-leak-in-mkiss_close.patch
+net-cdc_eem-fix-tx-fixup-skb-leak.patch
+cxgb4-fix-wrong-shift.patch
+bnxt_en-rediscover-phy-capabilities-after-firmware-r.patch
+bnxt_en-fix-tqm-fastpath-ring-backing-store-computat.patch
+bnxt_en-call-bnxt_ethtool_free-in-bnxt_init_one-erro.patch
+icmp-don-t-send-out-icmp-messages-with-a-source-addr.patch
+net-ethernet-fix-potential-use-after-free-in-ec_bhf_.patch
diff --git a/queue-5.12/skbuff-fix-incorrect-msg_zerocopy-copy-notifications.patch b/queue-5.12/skbuff-fix-incorrect-msg_zerocopy-copy-notifications.patch
new file mode 100644 (file)
index 0000000..d659f97
--- /dev/null
@@ -0,0 +1,65 @@
+From 98ec11e707621ca48c33ce6fe4ef56c0a76ab171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 18:41:57 -0400
+Subject: skbuff: fix incorrect msg_zerocopy copy notifications
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 3bdd5ee0ec8c14131d560da492e6df452c6fdd75 ]
+
+msg_zerocopy signals if a send operation required copying with a flag
+in serr->ee.ee_code.
+
+This field can be incorrect as of the below commit, as a result of
+both structs uarg and serr pointing into the same skb->cb[].
+
+uarg->zerocopy must be read before skb->cb[] is reinitialized to hold
+serr. Similar to other fields len, hi and lo, use a local variable to
+temporarily hold the value.
+
+This was not a problem before, when the value was passed as a function
+argument.
+
+Fixes: 75518851a2a0 ("skbuff: Push status and refcounts into sock_zerocopy_callback")
+Reported-by: Talal Ahmad <talalahmad@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skbuff.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index c421c8f80925..7997d99afbd8 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1252,6 +1252,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+       struct sock *sk = skb->sk;
+       struct sk_buff_head *q;
+       unsigned long flags;
++      bool is_zerocopy;
+       u32 lo, hi;
+       u16 len;
+@@ -1266,6 +1267,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+       len = uarg->len;
+       lo = uarg->id;
+       hi = uarg->id + len - 1;
++      is_zerocopy = uarg->zerocopy;
+       serr = SKB_EXT_ERR(skb);
+       memset(serr, 0, sizeof(*serr));
+@@ -1273,7 +1275,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+       serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+       serr->ee.ee_data = hi;
+       serr->ee.ee_info = lo;
+-      if (!uarg->zerocopy)
++      if (!is_zerocopy)
+               serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+       q = &sk->sk_error_queue;
+-- 
+2.30.2
+
diff --git a/queue-5.12/staging-rtl8723bs-fix-monitor-netdev-register-unregi.patch b/queue-5.12/staging-rtl8723bs-fix-monitor-netdev-register-unregi.patch
new file mode 100644 (file)
index 0000000..e88785a
--- /dev/null
@@ -0,0 +1,48 @@
+From 6349a75e29f89faf8e1fb61933b0bdd25238db0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Apr 2021 21:28:02 +0200
+Subject: staging: rtl8723bs: fix monitor netdev register/unregister
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit b90f51e8e1f5014c01c82a7bf4c611643d0a8bcb ]
+
+Due to the locking changes and callbacks happening inside
+cfg80211, we need to use cfg80211 versions of the register
+and unregister functions if called within cfg80211 methods,
+otherwise deadlocks occur.
+
+Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Link: https://lore.kernel.org/r/20210426212801.3d902cc9e6f4.Ie0b1e0c545920c61400a4b7d0f384ea61feb645a@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+index cbec65e5a464..62ea47f9fee5 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+@@ -2579,7 +2579,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
+       mon_wdev->iftype = NL80211_IFTYPE_MONITOR;
+       mon_ndev->ieee80211_ptr = mon_wdev;
+-      ret = register_netdevice(mon_ndev);
++      ret = cfg80211_register_netdevice(mon_ndev);
+       if (ret) {
+               goto out;
+       }
+@@ -2661,7 +2661,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy,
+       adapter = rtw_netdev_priv(ndev);
+       pwdev_priv = adapter_wdev_data(adapter);
+-      unregister_netdevice(ndev);
++      cfg80211_unregister_netdevice(ndev);
+       if (ndev == pwdev_priv->pmon_ndev) {
+               pwdev_priv->pmon_ndev = NULL;
+-- 
+2.30.2
+
diff --git a/queue-5.12/udp-fix-race-between-close-and-udp_abort.patch b/queue-5.12/udp-fix-race-between-close-and-udp_abort.patch
new file mode 100644 (file)
index 0000000..6718331
--- /dev/null
@@ -0,0 +1,77 @@
+From 5cc965003a179d47da9d43c3172aa8e975c12b8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 11:49:01 +0200
+Subject: udp: fix race between close() and udp_abort()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit a8b897c7bcd47f4147d066e22cc01d1026d7640e ]
+
+Kaustubh reported and diagnosed a panic in udp_lib_lookup().
+The root cause is udp_abort() racing with close(). Both
+racing functions acquire the socket lock, but udp{v6}_destroy_sock()
+release it before performing destructive actions.
+
+We can't easily extend the socket lock scope to avoid the race,
+instead use the SOCK_DEAD flag to prevent udp_abort from doing
+any action when the critical race happens.
+
+Diagnosed-and-tested-by: Kaustubh Pandey <kapandey@codeaurora.org>
+Fixes: 5d77dca82839 ("net: diag: support SOCK_DESTROY for UDP sockets")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 10 ++++++++++
+ net/ipv6/udp.c |  3 +++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index c586a6bb8c6d..3dd340679d09 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2576,6 +2576,9 @@ void udp_destroy_sock(struct sock *sk)
+ {
+       struct udp_sock *up = udp_sk(sk);
+       bool slow = lock_sock_fast(sk);
++
++      /* protects from races with udp_abort() */
++      sock_set_flag(sk, SOCK_DEAD);
+       udp_flush_pending_frames(sk);
+       unlock_sock_fast(sk, slow);
+       if (static_branch_unlikely(&udp_encap_needed_key)) {
+@@ -2826,10 +2829,17 @@ int udp_abort(struct sock *sk, int err)
+ {
+       lock_sock(sk);
++      /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
++       * with close()
++       */
++      if (sock_flag(sk, SOCK_DEAD))
++              goto out;
++
+       sk->sk_err = err;
+       sk->sk_error_report(sk);
+       __udp_disconnect(sk, 0);
++out:
+       release_sock(sk);
+       return 0;
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index d25e5a9252fd..29288f134d7a 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1597,6 +1597,9 @@ void udpv6_destroy_sock(struct sock *sk)
+ {
+       struct udp_sock *up = udp_sk(sk);
+       lock_sock(sk);
++
++      /* protects from races with udp_abort() */
++      sock_set_flag(sk, SOCK_DEAD);
+       udp_v6_flush_pending_frames(sk);
+       release_sock(sk);
+-- 
+2.30.2
+
diff --git a/queue-5.12/vrf-fix-maximum-mtu.patch b/queue-5.12/vrf-fix-maximum-mtu.patch
new file mode 100644 (file)
index 0000000..d587860
--- /dev/null
@@ -0,0 +1,65 @@
+From 67d046381974224ad6630dfef2f99f1567693a7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Jun 2021 16:59:51 +0200
+Subject: vrf: fix maximum MTU
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 9bb392f62447d73cc7dd7562413a2cd9104c82f8 ]
+
+My initial goal was to fix the default MTU, which is set to 65536, ie above
+the maximum defined in the driver: 65535 (ETH_MAX_MTU).
+
+In fact, it's seems more consistent, wrt min_mtu, to set the max_mtu to
+IP6_MAX_MTU (65535 + sizeof(struct ipv6hdr)) and use it by default.
+
+Let's also, for consistency, set the mtu in vrf_setup(). This function
+calls ether_setup(), which set the mtu to 1500. Thus, the whole mtu config
+is done in the same function.
+
+Before the patch:
+$ ip link add blue type vrf table 1234
+$ ip link list blue
+9: blue: <NOARP,MASTER> mtu 65536 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+    link/ether fa:f5:27:70:24:2a brd ff:ff:ff:ff:ff:ff
+$ ip link set dev blue mtu 65535
+$ ip link set dev blue mtu 65536
+Error: mtu greater than device maximum.
+
+Fixes: 5055376a3b44 ("net: vrf: Fix ping failed when vrf mtu is set to 0")
+CC: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 503e2fd7ce51..28a6c4cfe9b8 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev)
+       dev->flags = IFF_MASTER | IFF_NOARP;
+-      /* MTU is irrelevant for VRF device; set to 64k similar to lo */
+-      dev->mtu = 64 * 1024;
+-
+       /* similarly, oper state is irrelevant; set to up to avoid confusion */
+       dev->operstate = IF_OPER_UP;
+       netdev_lockdep_set_classes(dev);
+@@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev)
+        * which breaks networking.
+        */
+       dev->min_mtu = IPV6_MIN_MTU;
+-      dev->max_mtu = ETH_MAX_MTU;
++      dev->max_mtu = IP6_MAX_MTU;
++      dev->mtu = dev->max_mtu;
+ }
+ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
+-- 
+2.30.2
+