]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 25 Sep 2020 07:55:21 +0000 (09:55 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 25 Sep 2020 07:55:21 +0000 (09:55 +0200)
added patches:
act_ife-load-meta-modules-before-tcf_idr_check_alloc.patch
bnxt_en-avoid-sending-firmware-messages-when-aer-error-is-detected.patch
bnxt_en-fix-null-ptr-dereference-crash-in-bnxt_fw_reset_task.patch
bnxt_en-protect-bnxt_set_eee-and-bnxt_set_pauseparam-with-mutex.patch
bnxt_en-return-proper-error-codes-in-bnxt_show_temp.patch
bnxt_en-use-memcpy-to-copy-vpd-field-info.patch
cxgb4-fix-memory-leak-during-module-unload.patch
cxgb4-fix-offset-when-clearing-filter-byte-counters.patch
geneve-add-transport-ports-in-route-lookup-for-geneve.patch
hdlc_ppp-add-range-checks-in-ppp_cp_parse_cr.patch
hinic-bump-up-the-timeout-of-set_func_state-cmd.patch
hinic-fix-rewaking-txq-after-netif_tx_disable.patch
hv_netvsc-fix-hibernation-for-mlx5-vf-driver.patch
ip-fix-tos-reflection-in-ack-and-reset-packets.patch
ipv4-initialize-flowi4_multipath_hash-in-data-path.patch
ipv4-update-exception-handling-for-multipath-routes-via-same-device.patch
ipv6-avoid-lockdep-issue-in-fib6_del.patch
net-add-__must_check-to-skb_put_padto.patch
net-bridge-br_vlan_get_pvid_rcu-should-dereference-the-vlan-group-under-rcu.patch
net-dcb-validate-dcb_attr_dcb_buffer-argument.patch
net-dsa-link-interfaces-with-the-dsa-master-to-get-rid-of-lockdep-warnings.patch
net-dsa-microchip-ksz8795-really-set-the-correct-number-of-ports.patch
net-dsa-rtl8366-properly-clear-member-config.patch
net-ethernet-ti-cpsw_new-fix-suspend-resume.patch
net-fix-bridge-enslavement-failure.patch
net-ipa-fix-u32_replace_bits-by-u32p_xxx-version.patch
net-ipv6-fix-kconfig-dependency-warning-for-ipv6_seg6_hmac.patch
net-lantiq-disable-irqs-only-if-napi-gets-scheduled.patch
net-lantiq-use-napi_complete_done.patch
net-lantiq-use-netif_tx_napi_add-for-tx-napi.patch
net-lantiq-wake-tx-queue-again.patch
net-macb-fix-for-pause-frame-receive-enable-bit.patch
net-mlx5-fix-fte-cleanup.patch
net-mlx5e-enable-adding-peer-miss-rules-only-if-merged-eswitch-is-supported.patch
net-mlx5e-fix-endianness-when-calculating-pedit-mask-first-bit.patch
net-mlx5e-fix-memory-leak-of-tunnel-info-when-rule-under-multipath-not-ready.patch
net-mlx5e-tls-do-not-expose-fpga-tls-counter-if-not-supported.patch
net-mlx5e-use-rcu-to-protect-rq-xdp_prog.patch
net-mlx5e-use-synchronize_rcu-to-sync-with-napi.patch
net-phy-avoid-npd-upon-phy_detach-when-driver-is-unbound.patch
net-phy-call-phy_disable_interrupts-in-phy_attach_direct-instead.patch
net-phy-do-not-warn-in-phy_stop-on-phy_down.patch
net-qrtr-check-skb_put_padto-return-value.patch
net-sch_generic-aviod-concurrent-reset-and-enqueue-op-for-lockless-qdisc.patch
net-sched-initialize-with-0-before-setting-erspan-md-u.patch
net-sctp-fix-ipv6-ancestor_size-calc-in-sctp_copy_descendant.patch
nfp-use-correct-define-to-return-none-fec.patch
revert-netns-don-t-disable-bhs-when-locking-nsid_lock.patch
taprio-fix-allowing-too-small-intervals.patch
tipc-fix-memory-leak-in-tipc_group_create_member.patch
tipc-fix-shutdown-of-connection-oriented-socket.patch
tipc-use-skb_unshare-instead-in-tipc_buf_append.patch
wireguard-noise-take-lock-when-removing-handshake-entry-from-table.patch
wireguard-peerlookup-take-lock-before-checking-hash-in-replace-operation.patch

55 files changed:
queue-5.8/act_ife-load-meta-modules-before-tcf_idr_check_alloc.patch [new file with mode: 0644]
queue-5.8/bnxt_en-avoid-sending-firmware-messages-when-aer-error-is-detected.patch [new file with mode: 0644]
queue-5.8/bnxt_en-fix-null-ptr-dereference-crash-in-bnxt_fw_reset_task.patch [new file with mode: 0644]
queue-5.8/bnxt_en-protect-bnxt_set_eee-and-bnxt_set_pauseparam-with-mutex.patch [new file with mode: 0644]
queue-5.8/bnxt_en-return-proper-error-codes-in-bnxt_show_temp.patch [new file with mode: 0644]
queue-5.8/bnxt_en-use-memcpy-to-copy-vpd-field-info.patch [new file with mode: 0644]
queue-5.8/cxgb4-fix-memory-leak-during-module-unload.patch [new file with mode: 0644]
queue-5.8/cxgb4-fix-offset-when-clearing-filter-byte-counters.patch [new file with mode: 0644]
queue-5.8/geneve-add-transport-ports-in-route-lookup-for-geneve.patch [new file with mode: 0644]
queue-5.8/hdlc_ppp-add-range-checks-in-ppp_cp_parse_cr.patch [new file with mode: 0644]
queue-5.8/hinic-bump-up-the-timeout-of-set_func_state-cmd.patch [new file with mode: 0644]
queue-5.8/hinic-fix-rewaking-txq-after-netif_tx_disable.patch [new file with mode: 0644]
queue-5.8/hv_netvsc-fix-hibernation-for-mlx5-vf-driver.patch [new file with mode: 0644]
queue-5.8/ip-fix-tos-reflection-in-ack-and-reset-packets.patch [new file with mode: 0644]
queue-5.8/ipv4-initialize-flowi4_multipath_hash-in-data-path.patch [new file with mode: 0644]
queue-5.8/ipv4-update-exception-handling-for-multipath-routes-via-same-device.patch [new file with mode: 0644]
queue-5.8/ipv6-avoid-lockdep-issue-in-fib6_del.patch [new file with mode: 0644]
queue-5.8/net-add-__must_check-to-skb_put_padto.patch [new file with mode: 0644]
queue-5.8/net-bridge-br_vlan_get_pvid_rcu-should-dereference-the-vlan-group-under-rcu.patch [new file with mode: 0644]
queue-5.8/net-dcb-validate-dcb_attr_dcb_buffer-argument.patch [new file with mode: 0644]
queue-5.8/net-dsa-link-interfaces-with-the-dsa-master-to-get-rid-of-lockdep-warnings.patch [new file with mode: 0644]
queue-5.8/net-dsa-microchip-ksz8795-really-set-the-correct-number-of-ports.patch [new file with mode: 0644]
queue-5.8/net-dsa-rtl8366-properly-clear-member-config.patch [new file with mode: 0644]
queue-5.8/net-ethernet-ti-cpsw_new-fix-suspend-resume.patch [new file with mode: 0644]
queue-5.8/net-fix-bridge-enslavement-failure.patch [new file with mode: 0644]
queue-5.8/net-ipa-fix-u32_replace_bits-by-u32p_xxx-version.patch [new file with mode: 0644]
queue-5.8/net-ipv6-fix-kconfig-dependency-warning-for-ipv6_seg6_hmac.patch [new file with mode: 0644]
queue-5.8/net-lantiq-disable-irqs-only-if-napi-gets-scheduled.patch [new file with mode: 0644]
queue-5.8/net-lantiq-use-napi_complete_done.patch [new file with mode: 0644]
queue-5.8/net-lantiq-use-netif_tx_napi_add-for-tx-napi.patch [new file with mode: 0644]
queue-5.8/net-lantiq-wake-tx-queue-again.patch [new file with mode: 0644]
queue-5.8/net-macb-fix-for-pause-frame-receive-enable-bit.patch [new file with mode: 0644]
queue-5.8/net-mlx5-fix-fte-cleanup.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-enable-adding-peer-miss-rules-only-if-merged-eswitch-is-supported.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-fix-endianness-when-calculating-pedit-mask-first-bit.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-fix-memory-leak-of-tunnel-info-when-rule-under-multipath-not-ready.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-tls-do-not-expose-fpga-tls-counter-if-not-supported.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-use-rcu-to-protect-rq-xdp_prog.patch [new file with mode: 0644]
queue-5.8/net-mlx5e-use-synchronize_rcu-to-sync-with-napi.patch [new file with mode: 0644]
queue-5.8/net-phy-avoid-npd-upon-phy_detach-when-driver-is-unbound.patch [new file with mode: 0644]
queue-5.8/net-phy-call-phy_disable_interrupts-in-phy_attach_direct-instead.patch [new file with mode: 0644]
queue-5.8/net-phy-do-not-warn-in-phy_stop-on-phy_down.patch [new file with mode: 0644]
queue-5.8/net-qrtr-check-skb_put_padto-return-value.patch [new file with mode: 0644]
queue-5.8/net-sch_generic-aviod-concurrent-reset-and-enqueue-op-for-lockless-qdisc.patch [new file with mode: 0644]
queue-5.8/net-sched-initialize-with-0-before-setting-erspan-md-u.patch [new file with mode: 0644]
queue-5.8/net-sctp-fix-ipv6-ancestor_size-calc-in-sctp_copy_descendant.patch [new file with mode: 0644]
queue-5.8/nfp-use-correct-define-to-return-none-fec.patch [new file with mode: 0644]
queue-5.8/revert-netns-don-t-disable-bhs-when-locking-nsid_lock.patch [new file with mode: 0644]
queue-5.8/series
queue-5.8/taprio-fix-allowing-too-small-intervals.patch [new file with mode: 0644]
queue-5.8/tipc-fix-memory-leak-in-tipc_group_create_member.patch [new file with mode: 0644]
queue-5.8/tipc-fix-shutdown-of-connection-oriented-socket.patch [new file with mode: 0644]
queue-5.8/tipc-use-skb_unshare-instead-in-tipc_buf_append.patch [new file with mode: 0644]
queue-5.8/wireguard-noise-take-lock-when-removing-handshake-entry-from-table.patch [new file with mode: 0644]
queue-5.8/wireguard-peerlookup-take-lock-before-checking-hash-in-replace-operation.patch [new file with mode: 0644]

diff --git a/queue-5.8/act_ife-load-meta-modules-before-tcf_idr_check_alloc.patch b/queue-5.8/act_ife-load-meta-modules-before-tcf_idr_check_alloc.patch
new file mode 100644 (file)
index 0000000..1f5c3b0
--- /dev/null
@@ -0,0 +1,118 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 3 Sep 2020 19:10:11 -0700
+Subject: act_ife: load meta modules before tcf_idr_check_alloc()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit cc8e58f8325cdf14b9516b61c384cdfd02a4f408 ]
+
+The following deadlock scenario is triggered by syzbot:
+
+Thread A:                              Thread B:
+tcf_idr_check_alloc()
+...
+populate_metalist()
+  rtnl_unlock()
+                                       rtnl_lock()
+                                       ...
+  request_module()                     tcf_idr_check_alloc()
+  rtnl_lock()
+
+At this point, thread A is waiting for thread B to release RTNL
+lock, while thread B is waiting for thread A to commit the IDR
+change with tcf_idr_insert() later.
+
+Break this deadlock situation by preloading ife modules earlier,
+before tcf_idr_check_alloc(), this is fine because we only need
+to load modules we need potentially.
+
+Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com
+Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Cc: Vlad Buslov <vladbu@mellanox.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_ife.c |   44 ++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 34 insertions(+), 10 deletions(-)
+
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_ac
+               kfree_rcu(p, rcu);
+ }
++static int load_metalist(struct nlattr **tb, bool rtnl_held)
++{
++      int i;
++
++      for (i = 1; i < max_metacnt; i++) {
++              if (tb[i]) {
++                      void *val = nla_data(tb[i]);
++                      int len = nla_len(tb[i]);
++                      int rc;
++
++                      rc = load_metaops_and_vet(i, val, len, rtnl_held);
++                      if (rc != 0)
++                              return rc;
++              }
++      }
++
++      return 0;
++}
++
+ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
+                            bool exists, bool rtnl_held)
+ {
+@@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_
+                       val = nla_data(tb[i]);
+                       len = nla_len(tb[i]);
+-                      rc = load_metaops_and_vet(i, val, len, rtnl_held);
+-                      if (rc != 0)
+-                              return rc;
+-
+                       rc = add_metainfo(ife, i, val, len, exists);
+                       if (rc)
+                               return rc;
+@@ -509,6 +524,21 @@ static int tcf_ife_init(struct net *net,
+       if (!p)
+               return -ENOMEM;
++      if (tb[TCA_IFE_METALST]) {
++              err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
++                                                tb[TCA_IFE_METALST], NULL,
++                                                NULL);
++              if (err) {
++                      kfree(p);
++                      return err;
++              }
++              err = load_metalist(tb2, rtnl_held);
++              if (err) {
++                      kfree(p);
++                      return err;
++              }
++      }
++
+       index = parm->index;
+       err = tcf_idr_check_alloc(tn, &index, a, bind);
+       if (err < 0) {
+@@ -570,15 +600,9 @@ static int tcf_ife_init(struct net *net,
+       }
+       if (tb[TCA_IFE_METALST]) {
+-              err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
+-                                                tb[TCA_IFE_METALST], NULL,
+-                                                NULL);
+-              if (err)
+-                      goto metadata_parse_err;
+               err = populate_metalist(ife, tb2, exists, rtnl_held);
+               if (err)
+                       goto metadata_parse_err;
+-
+       } else {
+               /* if no passed metadata allow list or passed allow-all
+                * then here we process by adding as many supported metadatum
diff --git a/queue-5.8/bnxt_en-avoid-sending-firmware-messages-when-aer-error-is-detected.patch b/queue-5.8/bnxt_en-avoid-sending-firmware-messages-when-aer-error-is-detected.patch
new file mode 100644 (file)
index 0000000..af9c3d9
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Sat, 5 Sep 2020 22:55:36 -0400
+Subject: bnxt_en: Avoid sending firmware messages when AER error is detected.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit b340dc680ed48dcc05b56e1ebe1b9535813c3ee0 ]
+
+When the driver goes through PCIe AER reset in error state, all
+firmware messages will timeout because the PCIe bus is no longer
+accessible.  This can lead to AER reset taking many minutes to
+complete as each firmware command takes time to timeout.
+
+Define a new macro BNXT_NO_FW_ACCESS() to skip these firmware messages
+when either firmware is in fatal error state or when
+pci_channel_offline() is true.  It now takes a more reasonable 20 to
+30 seconds to complete AER recovery.
+
+Fixes: b4fff2079d10 ("bnxt_en: Do not send firmware messages if firmware is in error state.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    6 +++---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h |    4 ++++
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -4198,7 +4198,7 @@ static int bnxt_hwrm_do_send_msg(struct
+       u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+       u16 dst = BNXT_HWRM_CHNL_CHIMP;
+-      if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
++      if (BNXT_NO_FW_ACCESS(bp))
+               return -EBUSY;
+       if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
+@@ -5530,7 +5530,7 @@ static int hwrm_ring_free_send_msg(struc
+       struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
+       u16 error_code;
+-      if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
++      if (BNXT_NO_FW_ACCESS(bp))
+               return 0;
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
+@@ -7502,7 +7502,7 @@ static int bnxt_set_tpa(struct bnxt *bp,
+       if (set_tpa)
+               tpa_flags = bp->flags & BNXT_FLAG_TPA;
+-      else if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
++      else if (BNXT_NO_FW_ACCESS(bp))
+               return 0;
+       for (i = 0; i < bp->nr_vnics; i++) {
+               rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1673,6 +1673,10 @@ struct bnxt {
+ #define BNXT_STATE_FW_FATAL_COND      6
+ #define BNXT_STATE_DRV_REGISTERED     7
++#define BNXT_NO_FW_ACCESS(bp)                                 \
++      (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) ||    \
++       pci_channel_offline((bp)->pdev))
++
+       struct bnxt_irq *irq_tbl;
+       int                     total_irqs;
+       u8                      mac_addr[ETH_ALEN];
diff --git a/queue-5.8/bnxt_en-fix-null-ptr-dereference-crash-in-bnxt_fw_reset_task.patch b/queue-5.8/bnxt_en-fix-null-ptr-dereference-crash-in-bnxt_fw_reset_task.patch
new file mode 100644 (file)
index 0000000..19a0aaf
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Sat, 5 Sep 2020 22:55:37 -0400
+Subject: bnxt_en: Fix NULL ptr dereference crash in bnxt_fw_reset_task()
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit b16939b59cc00231a75d224fd058d22c9d064976 ]
+
+bnxt_fw_reset_task() which runs from a workqueue can race with
+bnxt_remove_one().  For example, if firmware reset and VF FLR are
+happening at about the same time.
+
+bnxt_remove_one() already cancels the workqueue and waits for it
+to finish, but we need to do this earlier before the devlink
+reporters are destroyed.  This will guarantee that
+the devlink reporters will always be valid when bnxt_fw_reset_task()
+is still running.
+
+Fixes: b148bb238c02 ("bnxt_en: Fix possible crash in bnxt_fw_reset_task().")
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11498,6 +11498,10 @@ static void bnxt_remove_one(struct pci_d
+       if (BNXT_PF(bp))
+               bnxt_sriov_disable(bp);
++      clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
++      bnxt_cancel_sp_work(bp);
++      bp->sp_event = 0;
++
+       bnxt_dl_fw_reporters_destroy(bp, true);
+       if (BNXT_PF(bp))
+               devlink_port_type_clear(&bp->dl_port);
+@@ -11505,9 +11509,6 @@ static void bnxt_remove_one(struct pci_d
+       unregister_netdev(dev);
+       bnxt_dl_unregister(bp);
+       bnxt_shutdown_tc(bp);
+-      clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+-      bnxt_cancel_sp_work(bp);
+-      bp->sp_event = 0;
+       bnxt_clear_int_mode(bp);
+       bnxt_hwrm_func_drv_unrgtr(bp);
diff --git a/queue-5.8/bnxt_en-protect-bnxt_set_eee-and-bnxt_set_pauseparam-with-mutex.patch b/queue-5.8/bnxt_en-protect-bnxt_set_eee-and-bnxt_set_pauseparam-with-mutex.patch
new file mode 100644 (file)
index 0000000..5f30383
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Sun, 20 Sep 2020 21:08:56 -0400
+Subject: bnxt_en: Protect bnxt_set_eee() and bnxt_set_pauseparam() with mutex.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit a53906908148d64423398a62c4435efb0d09652c ]
+
+All changes related to bp->link_info require the protection of the
+link_lock mutex.  It's not sufficient to rely just on RTNL.
+
+Fixes: 163e9ef63641 ("bnxt_en: Fix race when modifying pause settings.")
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |   31 ++++++++++++++--------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -1735,9 +1735,12 @@ static int bnxt_set_pauseparam(struct ne
+       if (!BNXT_PHY_CFG_ABLE(bp))
+               return -EOPNOTSUPP;
++      mutex_lock(&bp->link_lock);
+       if (epause->autoneg) {
+-              if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
+-                      return -EINVAL;
++              if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
++                      rc = -EINVAL;
++                      goto pause_exit;
++              }
+               link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+               if (bp->hwrm_spec_code >= 0x10201)
+@@ -1758,11 +1761,11 @@ static int bnxt_set_pauseparam(struct ne
+       if (epause->tx_pause)
+               link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX;
+-      if (netif_running(dev)) {
+-              mutex_lock(&bp->link_lock);
++      if (netif_running(dev))
+               rc = bnxt_hwrm_set_pause(bp);
+-              mutex_unlock(&bp->link_lock);
+-      }
++
++pause_exit:
++      mutex_unlock(&bp->link_lock);
+       return rc;
+ }
+@@ -2499,8 +2502,7 @@ static int bnxt_set_eee(struct net_devic
+       struct bnxt *bp = netdev_priv(dev);
+       struct ethtool_eee *eee = &bp->eee;
+       struct bnxt_link_info *link_info = &bp->link_info;
+-      u32 advertising =
+-               _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0);
++      u32 advertising;
+       int rc = 0;
+       if (!BNXT_PHY_CFG_ABLE(bp))
+@@ -2509,19 +2511,23 @@ static int bnxt_set_eee(struct net_devic
+       if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+               return -EOPNOTSUPP;
++      mutex_lock(&bp->link_lock);
++      advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0);
+       if (!edata->eee_enabled)
+               goto eee_ok;
+       if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
+               netdev_warn(dev, "EEE requires autoneg\n");
+-              return -EINVAL;
++              rc = -EINVAL;
++              goto eee_exit;
+       }
+       if (edata->tx_lpi_enabled) {
+               if (bp->lpi_tmr_hi && (edata->tx_lpi_timer > bp->lpi_tmr_hi ||
+                                      edata->tx_lpi_timer < bp->lpi_tmr_lo)) {
+                       netdev_warn(dev, "Valid LPI timer range is %d and %d microsecs\n",
+                                   bp->lpi_tmr_lo, bp->lpi_tmr_hi);
+-                      return -EINVAL;
++                      rc = -EINVAL;
++                      goto eee_exit;
+               } else if (!bp->lpi_tmr_hi) {
+                       edata->tx_lpi_timer = eee->tx_lpi_timer;
+               }
+@@ -2531,7 +2537,8 @@ static int bnxt_set_eee(struct net_devic
+       } else if (edata->advertised & ~advertising) {
+               netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n",
+                           edata->advertised, advertising);
+-              return -EINVAL;
++              rc = -EINVAL;
++              goto eee_exit;
+       }
+       eee->advertised = edata->advertised;
+@@ -2543,6 +2550,8 @@ eee_ok:
+       if (netif_running(dev))
+               rc = bnxt_hwrm_set_link_setting(bp, false, true);
++eee_exit:
++      mutex_unlock(&bp->link_lock);
+       return rc;
+ }
diff --git a/queue-5.8/bnxt_en-return-proper-error-codes-in-bnxt_show_temp.patch b/queue-5.8/bnxt_en-return-proper-error-codes-in-bnxt_show_temp.patch
new file mode 100644 (file)
index 0000000..49f6c0a
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Edwin Peer <edwin.peer@broadcom.com>
+Date: Sun, 20 Sep 2020 21:08:55 -0400
+Subject: bnxt_en: return proper error codes in bnxt_show_temp
+
+From: Edwin Peer <edwin.peer@broadcom.com>
+
+[ Upstream commit d69753fa1ecb3218b56b022722f7a5822735b876 ]
+
+Returning "unknown" as a temperature value violates the hwmon interface
+rules. Appropriate error codes should be returned via device_attribute
+show instead. These will ultimately be propagated to the user via the
+file system interface.
+
+In addition to the corrected error handling, it is an even better idea to
+not present the sensor in sysfs at all if it is known that the read will
+definitely fail. Given that temp1_input is currently the only sensor
+reported, ensure no hwmon registration if TEMP_MONITOR_QUERY is not
+supported or if it will fail due to access permissions. Something smarter
+may be needed if and when other sensors are added.
+
+Fixes: 12cce90b934b ("bnxt_en: fix HWRM error when querying VF temperature")
+Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8993,18 +8993,16 @@ static ssize_t bnxt_show_temp(struct dev
+       struct hwrm_temp_monitor_query_output *resp;
+       struct bnxt *bp = dev_get_drvdata(dev);
+       u32 len = 0;
++      int rc;
+       resp = bp->hwrm_cmd_resp_addr;
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
+       mutex_lock(&bp->hwrm_cmd_lock);
+-      if (!_hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT))
++      rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
++      if (!rc)
+               len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
+       mutex_unlock(&bp->hwrm_cmd_lock);
+-
+-      if (len)
+-              return len;
+-
+-      return sprintf(buf, "unknown\n");
++      return rc ?: len;
+ }
+ static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0);
+@@ -9024,7 +9022,16 @@ static void bnxt_hwmon_close(struct bnxt
+ static void bnxt_hwmon_open(struct bnxt *bp)
+ {
++      struct hwrm_temp_monitor_query_input req = {0};
+       struct pci_dev *pdev = bp->pdev;
++      int rc;
++
++      bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
++      rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
++      if (rc == -EACCES || rc == -EOPNOTSUPP) {
++              bnxt_hwmon_close(bp);
++              return;
++      }
+       if (bp->hwmon_dev)
+               return;
diff --git a/queue-5.8/bnxt_en-use-memcpy-to-copy-vpd-field-info.patch b/queue-5.8/bnxt_en-use-memcpy-to-copy-vpd-field-info.patch
new file mode 100644 (file)
index 0000000..49caf61
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Sun, 20 Sep 2020 21:08:54 -0400
+Subject: bnxt_en: Use memcpy to copy VPD field info.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 492adcf481292521ee8df1a482dc12acdb28aa15 ]
+
+Using strlcpy() to copy from VPD is not correct because VPD strings
+are not necessarily NULL terminated.  Use memcpy() to copy the VPD
+length up to the destination buffer size - 1.  The destination is
+zeroed memory so it will always be NULL terminated.
+
+Fixes: a0d0fd70fed5 ("bnxt_en: Read partno and serialno of the board from VPD")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11807,7 +11807,7 @@ static int bnxt_init_mac_addr(struct bnx
+ static void bnxt_vpd_read_info(struct bnxt *bp)
+ {
+       struct pci_dev *pdev = bp->pdev;
+-      int i, len, pos, ro_size;
++      int i, len, pos, ro_size, size;
+       ssize_t vpd_size;
+       u8 *vpd_data;
+@@ -11842,7 +11842,8 @@ static void bnxt_vpd_read_info(struct bn
+       if (len + pos > vpd_size)
+               goto read_sn;
+-      strlcpy(bp->board_partno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN));
++      size = min(len, BNXT_VPD_FLD_LEN - 1);
++      memcpy(bp->board_partno, &vpd_data[pos], size);
+ read_sn:
+       pos = pci_vpd_find_info_keyword(vpd_data, i, ro_size,
+@@ -11855,7 +11856,8 @@ read_sn:
+       if (len + pos > vpd_size)
+               goto exit;
+-      strlcpy(bp->board_serialno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN));
++      size = min(len, BNXT_VPD_FLD_LEN - 1);
++      memcpy(bp->board_serialno, &vpd_data[pos], size);
+ exit:
+       kfree(vpd_data);
+ }
diff --git a/queue-5.8/cxgb4-fix-memory-leak-during-module-unload.patch b/queue-5.8/cxgb4-fix-memory-leak-during-module-unload.patch
new file mode 100644 (file)
index 0000000..7a8b59e
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Raju Rangoju <rajur@chelsio.com>
+Date: Wed, 16 Sep 2020 21:50:39 +0530
+Subject: cxgb4: fix memory leak during module unload
+
+From: Raju Rangoju <rajur@chelsio.com>
+
+[ Upstream commit f4a26a9b311d7ff9db461278faf2869d06496ef8 ]
+
+Fix the memory leak in mps during module unload
+path by freeing mps reference entries if the list
+adpter->mps_ref is not already empty
+
+Fixes: 28b3870578ef ("cxgb4: Re-work the logic for mps refcounting")
+Signed-off-by: Raju Rangoju <rajur@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
+@@ -229,7 +229,7 @@ void cxgb4_free_mps_ref_entries(struct a
+ {
+       struct mps_entries_ref *mps_entry, *tmp;
+-      if (!list_empty(&adap->mps_ref))
++      if (list_empty(&adap->mps_ref))
+               return;
+       spin_lock(&adap->mps_ref_lock);
diff --git a/queue-5.8/cxgb4-fix-offset-when-clearing-filter-byte-counters.patch b/queue-5.8/cxgb4-fix-offset-when-clearing-filter-byte-counters.patch
new file mode 100644 (file)
index 0000000..eb84365
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Ganji Aravind <ganji.aravind@chelsio.com>
+Date: Fri, 4 Sep 2020 15:58:18 +0530
+Subject: cxgb4: Fix offset when clearing filter byte counters
+
+From: Ganji Aravind <ganji.aravind@chelsio.com>
+
+[ Upstream commit 94cc242a067a869c29800aa789d38b7676136e50 ]
+
+Pass the correct offset to clear the stale filter hit
+bytes counter. Otherwise, the counter starts incrementing
+from the stale information, instead of 0.
+
+Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters")
+Signed-off-by: Ganji Aravind <ganji.aravind@chelsio.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+@@ -1906,13 +1906,16 @@ out:
+ static int configure_filter_tcb(struct adapter *adap, unsigned int tid,
+                               struct filter_entry *f)
+ {
+-      if (f->fs.hitcnts)
++      if (f->fs.hitcnts) {
+               set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W,
+-                            TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) |
++                            TCB_TIMESTAMP_V(TCB_TIMESTAMP_M),
++                            TCB_TIMESTAMP_V(0ULL),
++                            1);
++              set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W,
+                             TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M),
+-                            TCB_TIMESTAMP_V(0ULL) |
+                             TCB_RTT_TS_RECENT_AGE_V(0ULL),
+                             1);
++      }
+       if (f->fs.newdmac)
+               set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1,
diff --git a/queue-5.8/geneve-add-transport-ports-in-route-lookup-for-geneve.patch b/queue-5.8/geneve-add-transport-ports-in-route-lookup-for-geneve.patch
new file mode 100644 (file)
index 0000000..6eb8cf5
--- /dev/null
@@ -0,0 +1,181 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Mark Gray <mark.d.gray@redhat.com>
+Date: Wed, 16 Sep 2020 05:19:35 -0400
+Subject: geneve: add transport ports in route lookup for geneve
+
+From: Mark Gray <mark.d.gray@redhat.com>
+
+[ Upstream commit 34beb21594519ce64a55a498c2fe7d567bc1ca20 ]
+
+This patch adds transport ports information for route lookup so that
+IPsec can select Geneve tunnel traffic to do encryption. This is
+needed for OVS/OVN IPsec with encrypted Geneve tunnels.
+
+This can be tested by configuring a host-host VPN using an IKE
+daemon and specifying port numbers. For example, for an
+Openswan-type configuration, the following parameters should be
+configured on both hosts and IPsec set up as-per normal:
+
+$ cat /etc/ipsec.conf
+
+conn in
+...
+left=$IP1
+right=$IP2
+...
+leftprotoport=udp/6081
+rightprotoport=udp
+...
+conn out
+...
+left=$IP1
+right=$IP2
+...
+leftprotoport=udp
+rightprotoport=udp/6081
+...
+
+The tunnel can then be setup using "ip" on both hosts (but
+changing the relevant IP addresses):
+
+$ ip link add tun type geneve id 1000 remote $IP2
+$ ip addr add 192.168.0.1/24 dev tun
+$ ip link set tun up
+
+This can then be tested by pinging from $IP1:
+
+$ ping 192.168.0.2
+
+Without this patch the traffic is unencrypted on the wire.
+
+Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels")
+Signed-off-by: Qiuyu Xiao <qiuyu.xiao.qyx@gmail.com>
+Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
+Reviewed-by: Greg Rose <gvrose8192@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/geneve.c |   37 +++++++++++++++++++++++++++----------
+ 1 file changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -773,7 +773,8 @@ static struct rtable *geneve_get_v4_rt(s
+                                      struct net_device *dev,
+                                      struct geneve_sock *gs4,
+                                      struct flowi4 *fl4,
+-                                     const struct ip_tunnel_info *info)
++                                     const struct ip_tunnel_info *info,
++                                     __be16 dport, __be16 sport)
+ {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+       struct geneve_dev *geneve = netdev_priv(dev);
+@@ -789,6 +790,8 @@ static struct rtable *geneve_get_v4_rt(s
+       fl4->flowi4_proto = IPPROTO_UDP;
+       fl4->daddr = info->key.u.ipv4.dst;
+       fl4->saddr = info->key.u.ipv4.src;
++      fl4->fl4_dport = dport;
++      fl4->fl4_sport = sport;
+       tos = info->key.tos;
+       if ((tos == 1) && !geneve->collect_md) {
+@@ -823,7 +826,8 @@ static struct dst_entry *geneve_get_v6_d
+                                          struct net_device *dev,
+                                          struct geneve_sock *gs6,
+                                          struct flowi6 *fl6,
+-                                         const struct ip_tunnel_info *info)
++                                         const struct ip_tunnel_info *info,
++                                         __be16 dport, __be16 sport)
+ {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+       struct geneve_dev *geneve = netdev_priv(dev);
+@@ -839,6 +843,9 @@ static struct dst_entry *geneve_get_v6_d
+       fl6->flowi6_proto = IPPROTO_UDP;
+       fl6->daddr = info->key.u.ipv6.dst;
+       fl6->saddr = info->key.u.ipv6.src;
++      fl6->fl6_dport = dport;
++      fl6->fl6_sport = sport;
++
+       prio = info->key.tos;
+       if ((prio == 1) && !geneve->collect_md) {
+               prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+@@ -885,14 +892,15 @@ static int geneve_xmit_skb(struct sk_buf
+       __be16 sport;
+       int err;
+-      rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
++      sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
++      rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
++                            geneve->info.key.tp_dst, sport);
+       if (IS_ERR(rt))
+               return PTR_ERR(rt);
+       skb_tunnel_check_pmtu(skb, &rt->dst,
+                             GENEVE_IPV4_HLEN + info->options_len);
+-      sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+       if (geneve->collect_md) {
+               tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+               ttl = key->ttl;
+@@ -947,13 +955,14 @@ static int geneve6_xmit_skb(struct sk_bu
+       __be16 sport;
+       int err;
+-      dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
++      sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
++      dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
++                              geneve->info.key.tp_dst, sport);
+       if (IS_ERR(dst))
+               return PTR_ERR(dst);
+       skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
+-      sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+       if (geneve->collect_md) {
+               prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+               ttl = key->ttl;
+@@ -1034,13 +1043,18 @@ static int geneve_fill_metadata_dst(stru
+ {
+       struct ip_tunnel_info *info = skb_tunnel_info(skb);
+       struct geneve_dev *geneve = netdev_priv(dev);
++      __be16 sport;
+       if (ip_tunnel_info_af(info) == AF_INET) {
+               struct rtable *rt;
+               struct flowi4 fl4;
++
+               struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
++              sport = udp_flow_src_port(geneve->net, skb,
++                                        1, USHRT_MAX, true);
+-              rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
++              rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
++                                    geneve->info.key.tp_dst, sport);
+               if (IS_ERR(rt))
+                       return PTR_ERR(rt);
+@@ -1050,9 +1064,13 @@ static int geneve_fill_metadata_dst(stru
+       } else if (ip_tunnel_info_af(info) == AF_INET6) {
+               struct dst_entry *dst;
+               struct flowi6 fl6;
++
+               struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
++              sport = udp_flow_src_port(geneve->net, skb,
++                                        1, USHRT_MAX, true);
+-              dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
++              dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
++                                      geneve->info.key.tp_dst, sport);
+               if (IS_ERR(dst))
+                       return PTR_ERR(dst);
+@@ -1063,8 +1081,7 @@ static int geneve_fill_metadata_dst(stru
+               return -EINVAL;
+       }
+-      info->key.tp_src = udp_flow_src_port(geneve->net, skb,
+-                                           1, USHRT_MAX, true);
++      info->key.tp_src = sport;
+       info->key.tp_dst = geneve->info.key.tp_dst;
+       return 0;
+ }
diff --git a/queue-5.8/hdlc_ppp-add-range-checks-in-ppp_cp_parse_cr.patch b/queue-5.8/hdlc_ppp-add-range-checks-in-ppp_cp_parse_cr.patch
new file mode 100644 (file)
index 0000000..423a68d
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Wed, 9 Sep 2020 12:46:48 +0300
+Subject: hdlc_ppp: add range checks in ppp_cp_parse_cr()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 66d42ed8b25b64eb63111a2b8582c5afc8bf1105 ]
+
+There are a couple bugs here:
+1) If opt[1] is zero then this results in a forever loop.  If the value
+   is less than 2 then it is invalid.
+2) It assumes that "len" is more than sizeof(valid_accm) or 6 which can
+   result in memory corruption.
+
+In the case of LCP_OPTION_ACCM, then  we should check "opt[1]" instead
+of "len" because, if "opt[1]" is less than sizeof(valid_accm) then
+"nak_len" gets out of sync and it can lead to memory corruption in the
+next iterations through the loop.  In case of LCP_OPTION_MAGIC, the
+only valid value for opt[1] is 6, but the code is trying to log invalid
+data so we should only discard the data when "len" is less than 6
+because that leads to a read overflow.
+
+Reported-by: ChenNan Of Chaitin Security Research Lab  <whutchennan@gmail.com>
+Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wan/hdlc_ppp.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/wan/hdlc_ppp.c
++++ b/drivers/net/wan/hdlc_ppp.c
+@@ -383,11 +383,8 @@ static void ppp_cp_parse_cr(struct net_d
+       }
+       for (opt = data; len; len -= opt[1], opt += opt[1]) {
+-              if (len < 2 || len < opt[1]) {
+-                      dev->stats.rx_errors++;
+-                      kfree(out);
+-                      return; /* bad packet, drop silently */
+-              }
++              if (len < 2 || opt[1] < 2 || len < opt[1])
++                      goto err_out;
+               if (pid == PID_LCP)
+                       switch (opt[0]) {
+@@ -395,6 +392,8 @@ static void ppp_cp_parse_cr(struct net_d
+                               continue; /* MRU always OK and > 1500 bytes? */
+                       case LCP_OPTION_ACCM: /* async control character map */
++                              if (opt[1] < sizeof(valid_accm))
++                                      goto err_out;
+                               if (!memcmp(opt, valid_accm,
+                                           sizeof(valid_accm)))
+                                       continue;
+@@ -406,6 +405,8 @@ static void ppp_cp_parse_cr(struct net_d
+                               }
+                               break;
+                       case LCP_OPTION_MAGIC:
++                              if (len < 6)
++                                      goto err_out;
+                               if (opt[1] != 6 || (!opt[2] && !opt[3] &&
+                                                   !opt[4] && !opt[5]))
+                                       break; /* reject invalid magic number */
+@@ -424,6 +425,11 @@ static void ppp_cp_parse_cr(struct net_d
+               ppp_cp_event(dev, pid, RCR_GOOD, CP_CONF_ACK, id, req_len, data);
+       kfree(out);
++      return;
++
++err_out:
++      dev->stats.rx_errors++;
++      kfree(out);
+ }
+ static int ppp_rx(struct sk_buff *skb)
diff --git a/queue-5.8/hinic-bump-up-the-timeout-of-set_func_state-cmd.patch b/queue-5.8/hinic-bump-up-the-timeout-of-set_func_state-cmd.patch
new file mode 100644 (file)
index 0000000..73be18a
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Luo bin <luobin9@huawei.com>
+Date: Fri, 4 Sep 2020 16:37:28 +0800
+Subject: hinic: bump up the timeout of SET_FUNC_STATE cmd
+
+From: Luo bin <luobin9@huawei.com>
+
+[ Upstream commit 4e4269ebe7e18038fffacf113e2dd5ded6d49942 ]
+
+We free memory regardless of the return value of SET_FUNC_STATE
+cmd in hinic_close function to avoid memory leak and this cmd may
+timeout when fw is busy with handling other cmds, so we bump up the
+timeout of this cmd to ensure it won't return failure.
+
+Fixes: 00e57a6d4ad3 ("net-next/hinic: Add Tx operation")
+Signed-off-by: Luo bin <luobin9@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+@@ -45,6 +45,8 @@
+ #define MGMT_MSG_TIMEOUT                5000
++#define SET_FUNC_PORT_MBOX_TIMEOUT    30000
++
+ #define SET_FUNC_PORT_MGMT_TIMEOUT    25000
+ #define mgmt_to_pfhwdev(pf_mgmt)        \
+@@ -358,16 +360,20 @@ int hinic_msg_to_mgmt(struct hinic_pf_to
+               return -EINVAL;
+       }
+-      if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE)
+-              timeout = SET_FUNC_PORT_MGMT_TIMEOUT;
++      if (HINIC_IS_VF(hwif)) {
++              if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE)
++                      timeout = SET_FUNC_PORT_MBOX_TIMEOUT;
+-      if (HINIC_IS_VF(hwif))
+               return hinic_mbox_to_pf(pf_to_mgmt->hwdev, mod, cmd, buf_in,
+-                                      in_size, buf_out, out_size, 0);
+-      else
++                                      in_size, buf_out, out_size, timeout);
++      } else {
++              if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE)
++                      timeout = SET_FUNC_PORT_MGMT_TIMEOUT;
++
+               return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size,
+                               buf_out, out_size, MGMT_DIRECT_SEND,
+                               MSG_NOT_RESP, timeout);
++      }
+ }
+ static void recv_mgmt_msg_work_handler(struct work_struct *work)
diff --git a/queue-5.8/hinic-fix-rewaking-txq-after-netif_tx_disable.patch b/queue-5.8/hinic-fix-rewaking-txq-after-netif_tx_disable.patch
new file mode 100644 (file)
index 0000000..2406c4f
--- /dev/null
@@ -0,0 +1,129 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Luo bin <luobin9@huawei.com>
+Date: Thu, 10 Sep 2020 22:04:40 +0800
+Subject: hinic: fix rewaking txq after netif_tx_disable
+
+From: Luo bin <luobin9@huawei.com>
+
+[ Upstream commit a1b80e0143a1b878f8e21d82fd55f3f46f0014be ]
+
+When calling hinic_close in hinic_set_channels, all queues are
+stopped after netif_tx_disable, but some queue may be rewaken in
+free_tx_poll by mistake while drv is handling tx irq. If one queue
+is rewaken core may call hinic_xmit_frame to send pkt after
+netif_tx_disable within a short time which may results in accessing
+memory that has been already freed in hinic_close. So we call
+napi_disable before netif_tx_disable in hinic_close to fix this bug.
+
+Fixes: 2eed5a8b614b ("hinic: add set_channels ethtool_ops support")
+Signed-off-by: Luo bin <luobin9@huawei.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/huawei/hinic/hinic_main.c |   24 ++++++++++++++++++++++++
+ drivers/net/ethernet/huawei/hinic/hinic_tx.c   |   18 +++---------------
+ 2 files changed, 27 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
+@@ -168,6 +168,24 @@ err_init_txq:
+       return err;
+ }
++static void enable_txqs_napi(struct hinic_dev *nic_dev)
++{
++      int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev);
++      int i;
++
++      for (i = 0; i < num_txqs; i++)
++              napi_enable(&nic_dev->txqs[i].napi);
++}
++
++static void disable_txqs_napi(struct hinic_dev *nic_dev)
++{
++      int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev);
++      int i;
++
++      for (i = 0; i < num_txqs; i++)
++              napi_disable(&nic_dev->txqs[i].napi);
++}
++
+ /**
+  * free_txqs - Free the Logical Tx Queues of specific NIC device
+  * @nic_dev: the specific NIC device
+@@ -394,6 +412,8 @@ int hinic_open(struct net_device *netdev
+               goto err_create_txqs;
+       }
++      enable_txqs_napi(nic_dev);
++
+       err = create_rxqs(nic_dev);
+       if (err) {
+               netif_err(nic_dev, drv, netdev,
+@@ -475,6 +495,7 @@ err_port_state:
+       }
+ err_create_rxqs:
++      disable_txqs_napi(nic_dev);
+       free_txqs(nic_dev);
+ err_create_txqs:
+@@ -488,6 +509,9 @@ int hinic_close(struct net_device *netde
+       struct hinic_dev *nic_dev = netdev_priv(netdev);
+       unsigned int flags;
++      /* Disable txq napi firstly to aviod rewaking txq in free_tx_poll */
++      disable_txqs_napi(nic_dev);
++
+       down(&nic_dev->mgmt_lock);
+       flags = nic_dev->flags;
+--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+@@ -684,18 +684,6 @@ static int free_tx_poll(struct napi_stru
+       return budget;
+ }
+-static void tx_napi_add(struct hinic_txq *txq, int weight)
+-{
+-      netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, weight);
+-      napi_enable(&txq->napi);
+-}
+-
+-static void tx_napi_del(struct hinic_txq *txq)
+-{
+-      napi_disable(&txq->napi);
+-      netif_napi_del(&txq->napi);
+-}
+-
+ static irqreturn_t tx_irq(int irq, void *data)
+ {
+       struct hinic_txq *txq = data;
+@@ -724,7 +712,7 @@ static int tx_request_irq(struct hinic_t
+       struct hinic_sq *sq = txq->sq;
+       int err;
+-      tx_napi_add(txq, nic_dev->tx_weight);
++      netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, nic_dev->tx_weight);
+       hinic_hwdev_msix_set(nic_dev->hwdev, sq->msix_entry,
+                            TX_IRQ_NO_PENDING, TX_IRQ_NO_COALESC,
+@@ -734,7 +722,7 @@ static int tx_request_irq(struct hinic_t
+       err = request_irq(sq->irq, tx_irq, 0, txq->irq_name, txq);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to request Tx irq\n");
+-              tx_napi_del(txq);
++              netif_napi_del(&txq->napi);
+               return err;
+       }
+@@ -746,7 +734,7 @@ static void tx_free_irq(struct hinic_txq
+       struct hinic_sq *sq = txq->sq;
+       free_irq(sq->irq, txq);
+-      tx_napi_del(txq);
++      netif_napi_del(&txq->napi);
+ }
+ /**
diff --git a/queue-5.8/hv_netvsc-fix-hibernation-for-mlx5-vf-driver.patch b/queue-5.8/hv_netvsc-fix-hibernation-for-mlx5-vf-driver.patch
new file mode 100644 (file)
index 0000000..d0b7622
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Dexuan Cui <decui@microsoft.com>
+Date: Mon, 7 Sep 2020 00:13:39 -0700
+Subject: hv_netvsc: Fix hibernation for mlx5 VF driver
+
+From: Dexuan Cui <decui@microsoft.com>
+
+[ Upstream commit 19162fd4063a3211843b997a454b505edb81d5ce ]
+
+mlx5_suspend()/resume() keep the network interface, so during hibernation
+netvsc_unregister_vf() and netvsc_register_vf() are not called, and hence
+netvsc_resume() should call netvsc_vf_changed() to switch the data path
+back to the VF after hibernation. Note: after we close and re-open the
+vmbus channel of the netvsc NIC in netvsc_suspend() and netvsc_resume(),
+the data path is implicitly switched to the netvsc NIC. Similarly,
+netvsc_suspend() should not call netvsc_unregister_vf(), otherwise the VF
+can no longer be used after hibernation.
+
+For mlx4, since the VF network interafce is explicitly destroyed and
+re-created during hibernation (see mlx4_suspend()/resume()), hv_netvsc
+already explicitly switches the data path from and to the VF automatically
+via netvsc_register_vf() and netvsc_unregister_vf(), so mlx4 doesn't need
+this fix. Note: mlx4 can still work with the fix because in
+netvsc_suspend()/resume() ndev_ctx->vf_netdev is NULL for mlx4.
+
+Fixes: 0efeea5fb153 ("hv_netvsc: Add the support of hibernation")
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2544,8 +2544,8 @@ static int netvsc_remove(struct hv_devic
+ static int netvsc_suspend(struct hv_device *dev)
+ {
+       struct net_device_context *ndev_ctx;
+-      struct net_device *vf_netdev, *net;
+       struct netvsc_device *nvdev;
++      struct net_device *net;
+       int ret;
+       net = hv_get_drvdata(dev);
+@@ -2561,10 +2561,6 @@ static int netvsc_suspend(struct hv_devi
+               goto out;
+       }
+-      vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+-      if (vf_netdev)
+-              netvsc_unregister_vf(vf_netdev);
+-
+       /* Save the current config info */
+       ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
+@@ -2580,6 +2576,7 @@ static int netvsc_resume(struct hv_devic
+       struct net_device *net = hv_get_drvdata(dev);
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device_info *device_info;
++      struct net_device *vf_netdev;
+       int ret;
+       rtnl_lock();
+@@ -2592,6 +2589,15 @@ static int netvsc_resume(struct hv_devic
+       netvsc_devinfo_put(device_info);
+       net_device_ctx->saved_netvsc_dev_info = NULL;
++      /* A NIC driver (e.g. mlx5) may keep the VF network interface across
++       * hibernation, but here the data path is implicitly switched to the
++       * netvsc NIC since the vmbus channel is closed and re-opened, so
++       * netvsc_vf_changed() must be used to switch the data path to the VF.
++       */
++      vf_netdev = rtnl_dereference(net_device_ctx->vf_netdev);
++      if (vf_netdev && netvsc_vf_changed(vf_netdev) != NOTIFY_OK)
++              ret = -EINVAL;
++
+       rtnl_unlock();
+       return ret;
diff --git a/queue-5.8/ip-fix-tos-reflection-in-ack-and-reset-packets.patch b/queue-5.8/ip-fix-tos-reflection-in-ack-and-reset-packets.patch
new file mode 100644 (file)
index 0000000..679fc79
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Wei Wang <weiwan@google.com>
+Date: Tue, 8 Sep 2020 14:09:34 -0700
+Subject: ip: fix tos reflection in ack and reset packets
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit ba9e04a7ddf4f22a10e05bf9403db6b97743c7bf ]
+
+Currently, in tcp_v4_reqsk_send_ack() and tcp_v4_send_reset(), we
+echo the TOS value of the received packets in the response.
+However, we do not want to echo the lower 2 ECN bits in accordance
+with RFC 3168 6.1.5 robustness principles.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+
+Signed-off-by: Wei Wang <weiwan@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -74,6 +74,7 @@
+ #include <net/icmp.h>
+ #include <net/checksum.h>
+ #include <net/inetpeer.h>
++#include <net/inet_ecn.h>
+ #include <net/lwtunnel.h>
+ #include <linux/bpf-cgroup.h>
+ #include <linux/igmp.h>
+@@ -1697,7 +1698,7 @@ void ip_send_unicast_reply(struct sock *
+       if (IS_ERR(rt))
+               return;
+-      inet_sk(sk)->tos = arg->tos;
++      inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
+       sk->sk_protocol = ip_hdr(skb)->protocol;
+       sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/queue-5.8/ipv4-initialize-flowi4_multipath_hash-in-data-path.patch b/queue-5.8/ipv4-initialize-flowi4_multipath_hash-in-data-path.patch
new file mode 100644 (file)
index 0000000..332f9b8
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: David Ahern <dsahern@gmail.com>
+Date: Sun, 13 Sep 2020 12:43:39 -0600
+Subject: ipv4: Initialize flowi4_multipath_hash in data path
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit 1869e226a7b3ef75b4f70ede2f1b7229f7157fa4 ]
+
+flowi4_multipath_hash was added by the commit referenced below for
+tunnels. Unfortunately, the patch did not initialize the new field
+for several fast path lookups that do not initialize the entire flow
+struct to 0. Fix those locations. Currently, flowi4_multipath_hash
+is random garbage and affects the hash value computed by
+fib_multipath_hash for multipath selection.
+
+Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Cc: wenxu <wenxu@ucloud.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/flow.h      |    1 +
+ net/core/filter.c       |    1 +
+ net/ipv4/fib_frontend.c |    1 +
+ net/ipv4/route.c        |    1 +
+ 4 files changed, 4 insertions(+)
+
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -116,6 +116,7 @@ static inline void flowi4_init_output(st
+       fl4->saddr = saddr;
+       fl4->fl4_dport = dport;
+       fl4->fl4_sport = sport;
++      fl4->flowi4_multipath_hash = 0;
+ }
+ /* Reset some input parameters after previous lookup */
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -4774,6 +4774,7 @@ static int bpf_ipv4_fib_lookup(struct ne
+       fl4.saddr = params->ipv4_src;
+       fl4.fl4_sport = params->sport;
+       fl4.fl4_dport = params->dport;
++      fl4.flowi4_multipath_hash = 0;
+       if (flags & BPF_FIB_LOOKUP_DIRECT) {
+               u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -362,6 +362,7 @@ static int __fib_validate_source(struct
+       fl4.flowi4_tun_key.tun_id = 0;
+       fl4.flowi4_flags = 0;
+       fl4.flowi4_uid = sock_net_uid(net, NULL);
++      fl4.flowi4_multipath_hash = 0;
+       no_addr = idev->ifa_list == NULL;
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2142,6 +2142,7 @@ static int ip_route_input_slow(struct sk
+       fl4.daddr = daddr;
+       fl4.saddr = saddr;
+       fl4.flowi4_uid = sock_net_uid(net, NULL);
++      fl4.flowi4_multipath_hash = 0;
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
+               flkeys = &_flkeys;
diff --git a/queue-5.8/ipv4-update-exception-handling-for-multipath-routes-via-same-device.patch b/queue-5.8/ipv4-update-exception-handling-for-multipath-routes-via-same-device.patch
new file mode 100644 (file)
index 0000000..20d6123
--- /dev/null
@@ -0,0 +1,162 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: David Ahern <dsahern@kernel.org>
+Date: Mon, 14 Sep 2020 21:03:54 -0600
+Subject: ipv4: Update exception handling for multipath routes via same device
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit 2fbc6e89b2f1403189e624cabaf73e189c5e50c6 ]
+
+Kfir reported that pmtu exceptions are not created properly for
+deployments where multipath routes use the same device.
+
+After some digging I see 2 compounding problems:
+1. ip_route_output_key_hash_rcu is updating the flowi4_oif *after*
+   the route lookup. This is the second use case where this has
+   been a problem (the first is related to use of vti devices with
+   VRF). I can not find any reason for the oif to be changed after the
+   lookup; the code goes back to the start of git. It does not seem
+   logical so remove it.
+
+2. fib_lookups for exceptions do not call fib_select_path to handle
+   multipath route selection based on the hash.
+
+The end result is that the fib_lookup used to add the exception
+always creates it based using the first leg of the route.
+
+An example topology showing the problem:
+
+                 |  host1
+             +------+
+             | eth0 |  .209
+             +------+
+                 |
+             +------+
+     switch  | br0  |
+             +------+
+                 |
+       +---------+---------+
+       | host2             |  host3
+   +------+             +------+
+   | eth0 | .250        | eth0 | 192.168.252.252
+   +------+             +------+
+
+   +-----+             +-----+
+   | vti | .2          | vti | 192.168.247.3
+   +-----+             +-----+
+       \                  /
+ =================================
+ tunnels
+         192.168.247.1/24
+
+for h in host1 host2 host3; do
+        ip netns add ${h}
+        ip -netns ${h} link set lo up
+        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
+done
+
+ip netns add switch
+ip -netns switch li set lo up
+ip -netns switch link add br0 type bridge stp 0
+ip -netns switch link set br0 up
+
+for n in 1 2 3; do
+        ip -netns switch link add eth-sw type veth peer name eth-h${n}
+        ip -netns switch li set eth-h${n} master br0 up
+        ip -netns switch li set eth-sw netns host${n} name eth0
+done
+
+ip -netns host1 addr add 192.168.252.209/24 dev eth0
+ip -netns host1 link set dev eth0 up
+ip -netns host1 route add 192.168.247.0/24 \
+        nexthop via 192.168.252.250 dev eth0 nexthop via 192.168.252.252 dev eth0
+
+ip -netns host2 addr add 192.168.252.250/24 dev eth0
+ip -netns host2 link set dev eth0 up
+
+ip -netns host2 addr add 192.168.252.252/24 dev eth0
+ip -netns host3 link set dev eth0 up
+
+ip netns add tunnel
+ip -netns tunnel li set lo up
+ip -netns tunnel li add br0 type bridge
+ip -netns tunnel li set br0 up
+for n in $(seq 11 20); do
+        ip -netns tunnel addr add dev br0 192.168.247.${n}/24
+done
+
+for n in 2 3
+do
+        ip -netns tunnel link add vti${n} type veth peer name eth${n}
+        ip -netns tunnel link set eth${n} mtu 1360 master br0 up
+        ip -netns tunnel link set vti${n} netns host${n} mtu 1360 up
+        ip -netns host${n} addr add dev vti${n} 192.168.247.${n}/24
+done
+ip -netns tunnel ro add default nexthop via 192.168.247.2 nexthop via 192.168.247.3
+
+ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.11
+ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.15
+ip -netns host1 ro ls cache
+
+Before this patch the cache always shows exceptions against the first
+leg in the multipath route; 192.168.252.250 per this example. Since the
+hash has an initial random seed, you may need to vary the final octet
+more than what is listed. In my tests, using addresses between 11 and 19
+usually found 1 that used both legs.
+
+With this patch, the cache will have exceptions for both legs.
+
+Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions")
+Reported-by: Kfir Itzhak <mastertheknife@gmail.com>
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -786,8 +786,10 @@ static void __ip_do_redirect(struct rtab
+                       neigh_event_send(n, NULL);
+               } else {
+                       if (fib_lookup(net, fl4, &res, 0) == 0) {
+-                              struct fib_nh_common *nhc = FIB_RES_NHC(res);
++                              struct fib_nh_common *nhc;
++                              fib_select_path(net, &res, fl4, skb);
++                              nhc = FIB_RES_NHC(res);
+                               update_or_create_fnhe(nhc, fl4->daddr, new_gw,
+                                               0, false,
+                                               jiffies + ip_rt_gc_timeout);
+@@ -1013,6 +1015,7 @@ out:     kfree_skb(skb);
+ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+ {
+       struct dst_entry *dst = &rt->dst;
++      struct net *net = dev_net(dst->dev);
+       u32 old_mtu = ipv4_mtu(dst);
+       struct fib_result res;
+       bool lock = false;
+@@ -1033,9 +1036,11 @@ static void __ip_rt_update_pmtu(struct r
+               return;
+       rcu_read_lock();
+-      if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
+-              struct fib_nh_common *nhc = FIB_RES_NHC(res);
++      if (fib_lookup(net, fl4, &res, 0) == 0) {
++              struct fib_nh_common *nhc;
++              fib_select_path(net, &res, fl4, NULL);
++              nhc = FIB_RES_NHC(res);
+               update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+                                     jiffies + ip_rt_mtu_expires);
+       }
+@@ -2663,8 +2668,6 @@ struct rtable *ip_route_output_key_hash_
+       fib_select_path(net, res, fl4, skb);
+       dev_out = FIB_RES_DEV(*res);
+-      fl4->flowi4_oif = dev_out->ifindex;
+-
+ make_route:
+       rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
diff --git a/queue-5.8/ipv6-avoid-lockdep-issue-in-fib6_del.patch b/queue-5.8/ipv6-avoid-lockdep-issue-in-fib6_del.patch
new file mode 100644 (file)
index 0000000..fa4d228
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 8 Sep 2020 01:20:23 -0700
+Subject: ipv6: avoid lockdep issue in fib6_del()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 843d926b003ea692468c8cc5bea1f9f58dfa8c75 ]
+
+syzbot reported twice a lockdep issue in fib6_del() [1]
+which I think is caused by net->ipv6.fib6_null_entry
+having a NULL fib6_table pointer.
+
+fib6_del() already checks for fib6_null_entry special
+case, we only need to return earlier.
+
+Bug seems to occur very rarely, I have thus chosen
+a 'bug origin' that makes backports not too complex.
+
+[1]
+WARNING: suspicious RCU usage
+5.9.0-rc4-syzkaller #0 Not tainted
+-----------------------------
+net/ipv6/ip6_fib.c:1996 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+4 locks held by syz-executor.5/8095:
+ #0: ffffffff8a7ea708 (rtnl_mutex){+.+.}-{3:3}, at: ppp_release+0x178/0x240 drivers/net/ppp/ppp_generic.c:401
+ #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: spin_trylock_bh include/linux/spinlock.h:414 [inline]
+ #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: fib6_run_gc+0x21b/0x2d0 net/ipv6/ip6_fib.c:2312
+ #2: ffffffff89bd6a40 (rcu_read_lock){....}-{1:2}, at: __fib6_clean_all+0x0/0x290 net/ipv6/ip6_fib.c:2613
+ #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:359 [inline]
+ #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: __fib6_clean_all+0x107/0x290 net/ipv6/ip6_fib.c:2245
+
+stack backtrace:
+CPU: 1 PID: 8095 Comm: syz-executor.5 Not tainted 5.9.0-rc4-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x198/0x1fd lib/dump_stack.c:118
+ fib6_del+0x12b4/0x1630 net/ipv6/ip6_fib.c:1996
+ fib6_clean_node+0x39b/0x570 net/ipv6/ip6_fib.c:2180
+ fib6_walk_continue+0x4aa/0x8e0 net/ipv6/ip6_fib.c:2102
+ fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2150
+ fib6_clean_tree+0xdb/0x120 net/ipv6/ip6_fib.c:2230
+ __fib6_clean_all+0x120/0x290 net/ipv6/ip6_fib.c:2246
+ fib6_clean_all net/ipv6/ip6_fib.c:2257 [inline]
+ fib6_run_gc+0x113/0x2d0 net/ipv6/ip6_fib.c:2320
+ ndisc_netdev_event+0x217/0x350 net/ipv6/ndisc.c:1805
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:83
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033
+ call_netdevice_notifiers_extack net/core/dev.c:2045 [inline]
+ call_netdevice_notifiers net/core/dev.c:2059 [inline]
+ dev_close_many+0x30b/0x650 net/core/dev.c:1634
+ rollback_registered_many+0x3a8/0x1210 net/core/dev.c:9261
+ rollback_registered net/core/dev.c:9329 [inline]
+ unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10410
+ unregister_netdevice include/linux/netdevice.h:2774 [inline]
+ ppp_release+0x216/0x240 drivers/net/ppp/ppp_generic.c:403
+ __fput+0x285/0x920 fs/file_table.c:281
+ task_work_run+0xdd/0x190 kernel/task_work.c:141
+ tracehook_notify_resume include/linux/tracehook.h:188 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:163 [inline]
+ exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190
+ syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 421842edeaf6 ("net/ipv6: Add fib6_null_entry")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: David Ahern <dsahern@gmail.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1992,14 +1992,19 @@ static void fib6_del_route(struct fib6_t
+ /* Need to own table->tb6_lock */
+ int fib6_del(struct fib6_info *rt, struct nl_info *info)
+ {
+-      struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
+-                                  lockdep_is_held(&rt->fib6_table->tb6_lock));
+-      struct fib6_table *table = rt->fib6_table;
+       struct net *net = info->nl_net;
+       struct fib6_info __rcu **rtp;
+       struct fib6_info __rcu **rtp_next;
++      struct fib6_table *table;
++      struct fib6_node *fn;
+-      if (!fn || rt == net->ipv6.fib6_null_entry)
++      if (rt == net->ipv6.fib6_null_entry)
++              return -ENOENT;
++
++      table = rt->fib6_table;
++      fn = rcu_dereference_protected(rt->fib6_node,
++                                     lockdep_is_held(&table->tb6_lock));
++      if (!fn)
+               return -ENOENT;
+       WARN_ON(!(fn->fn_flags & RTN_RTINFO));
diff --git a/queue-5.8/net-add-__must_check-to-skb_put_padto.patch b/queue-5.8/net-add-__must_check-to-skb_put_padto.patch
new file mode 100644 (file)
index 0000000..5e21517
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 9 Sep 2020 01:27:40 -0700
+Subject: net: add __must_check to skb_put_padto()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 4a009cb04aeca0de60b73f37b102573354214b52 ]
+
+skb_put_padto() and __skb_put_padto() callers
+must check return values or risk use-after-free.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -3208,8 +3208,9 @@ static inline int skb_padto(struct sk_bu
+  *    is untouched. Otherwise it is extended. Returns zero on
+  *    success. The skb is freed on error if @free_on_error is true.
+  */
+-static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len,
+-                                bool free_on_error)
++static inline int __must_check __skb_put_padto(struct sk_buff *skb,
++                                             unsigned int len,
++                                             bool free_on_error)
+ {
+       unsigned int size = skb->len;
+@@ -3232,7 +3233,7 @@ static inline int __skb_put_padto(struct
+  *    is untouched. Otherwise it is extended. Returns zero on
+  *    success. The skb is freed on error.
+  */
+-static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
++static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len)
+ {
+       return __skb_put_padto(skb, len, true);
+ }
diff --git a/queue-5.8/net-bridge-br_vlan_get_pvid_rcu-should-dereference-the-vlan-group-under-rcu.patch b/queue-5.8/net-bridge-br_vlan_get_pvid_rcu-should-dereference-the-vlan-group-under-rcu.patch
new file mode 100644 (file)
index 0000000..60ff93a
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Tue, 22 Sep 2020 01:07:09 +0300
+Subject: net: bridge: br_vlan_get_pvid_rcu() should dereference the VLAN group under RCU
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 99f62a746066fa436aa15d4606a538569540db08 ]
+
+When calling the RCU brother of br_vlan_get_pvid(), lockdep warns:
+
+=============================
+WARNING: suspicious RCU usage
+5.9.0-rc3-01631-g13c17acb8e38-dirty #814 Not tainted
+-----------------------------
+net/bridge/br_private.h:1054 suspicious rcu_dereference_protected() usage!
+
+Call trace:
+ lockdep_rcu_suspicious+0xd4/0xf8
+ __br_vlan_get_pvid+0xc0/0x100
+ br_vlan_get_pvid_rcu+0x78/0x108
+
+The warning is because br_vlan_get_pvid_rcu() calls nbp_vlan_group()
+which calls rtnl_dereference() instead of rcu_dereference(). In turn,
+rtnl_dereference() calls rcu_dereference_protected() which assumes
+operation under an RCU write-side critical section, which obviously is
+not the case here. So, when the incorrect primitive is used to access
+the RCU-protected VLAN group pointer, READ_ONCE() is not used, which may
+cause various unexpected problems.
+
+I'm sad to say that br_vlan_get_pvid() and br_vlan_get_pvid_rcu() cannot
+share the same implementation. So fix the bug by splitting the 2
+functions, and making br_vlan_get_pvid_rcu() retrieve the VLAN groups
+under proper locking annotations.
+
+Fixes: 7582f5b70f9a ("bridge: add br_vlan_get_pvid_rcu()")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_vlan.c |   27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_
+       }
+ }
+-static int __br_vlan_get_pvid(const struct net_device *dev,
+-                            struct net_bridge_port *p, u16 *p_pvid)
++int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+ {
+       struct net_bridge_vlan_group *vg;
++      struct net_bridge_port *p;
++      ASSERT_RTNL();
++      p = br_port_get_check_rtnl(dev);
+       if (p)
+               vg = nbp_vlan_group(p);
+       else if (netif_is_bridge_master(dev))
+@@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const stru
+       *p_pvid = br_get_pvid(vg);
+       return 0;
+ }
+-
+-int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+-{
+-      ASSERT_RTNL();
+-
+-      return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid);
+-}
+ EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
+ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
+ {
+-      return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid);
++      struct net_bridge_vlan_group *vg;
++      struct net_bridge_port *p;
++
++      p = br_port_get_check_rcu(dev);
++      if (p)
++              vg = nbp_vlan_group_rcu(p);
++      else if (netif_is_bridge_master(dev))
++              vg = br_vlan_group_rcu(netdev_priv(dev));
++      else
++              return -EINVAL;
++
++      *p_pvid = br_get_pvid(vg);
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
diff --git a/queue-5.8/net-dcb-validate-dcb_attr_dcb_buffer-argument.patch b/queue-5.8/net-dcb-validate-dcb_attr_dcb_buffer-argument.patch
new file mode 100644 (file)
index 0000000..a9f3d90
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Petr Machata <petrm@nvidia.com>
+Date: Thu, 10 Sep 2020 14:09:05 +0200
+Subject: net: DCB: Validate DCB_ATTR_DCB_BUFFER argument
+
+From: Petr Machata <petrm@nvidia.com>
+
+[ Upstream commit 297e77e53eadb332d5062913447b104a772dc33b ]
+
+The parameter passed via DCB_ATTR_DCB_BUFFER is a struct dcbnl_buffer. The
+field prio2buffer is an array of IEEE_8021Q_MAX_PRIORITIES bytes, where
+each value is a number of a buffer to direct that priority's traffic to.
+That value is however never validated to lie within the bounds set by
+DCBX_MAX_BUFFERS. The only driver that currently implements the callback is
+mlx5 (maintainers CCd), and that does not do any validation either, in
+particual allowing incorrect configuration if the prio2buffer value does
+not fit into 4 bits.
+
+Instead of offloading the need to validate the buffer index to drivers, do
+it right there in core, and bounce the request if the value is too large.
+
+CC: Parav Pandit <parav@nvidia.com>
+CC: Saeed Mahameed <saeedm@nvidia.com>
+Fixes: e549f6f9c098 ("net/dcb: Add dcbnl buffer attribute")
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dcb/dcbnl.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_dev
+ {
+       const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+       struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
++      int prio;
+       int err;
+       if (!ops)
+@@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_dev
+               struct dcbnl_buffer *buffer =
+                       nla_data(ieee[DCB_ATTR_DCB_BUFFER]);
++              for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) {
++                      if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) {
++                              err = -EINVAL;
++                              goto err;
++                      }
++              }
++
+               err = ops->dcbnl_setbuffer(netdev, buffer);
+               if (err)
+                       goto err;
diff --git a/queue-5.8/net-dsa-link-interfaces-with-the-dsa-master-to-get-rid-of-lockdep-warnings.patch b/queue-5.8/net-dsa-link-interfaces-with-the-dsa-master-to-get-rid-of-lockdep-warnings.patch
new file mode 100644 (file)
index 0000000..a17da40
--- /dev/null
@@ -0,0 +1,153 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vladimir Oltean <olteanv@gmail.com>
+Date: Tue, 8 Sep 2020 02:48:42 +0300
+Subject: net: dsa: link interfaces with the DSA master to get rid of lockdep warnings
+
+From: Vladimir Oltean <olteanv@gmail.com>
+
+[ Upstream commit 2f1e8ea726e9020e01e9e2ae29c2d5eb11133032 ]
+
+Since commit 845e0ebb4408 ("net: change addr_list_lock back to static
+key"), cascaded DSA setups (DSA switch port as DSA master for another
+DSA switch port) are emitting this lockdep warning:
+
+============================================
+WARNING: possible recursive locking detected
+5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted
+--------------------------------------------
+dhcpcd/323 is trying to acquire lock:
+ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90
+
+but task is already holding lock:
+ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90
+
+other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+       CPU0
+       ----
+  lock(&dsa_master_addr_list_lock_key/1);
+  lock(&dsa_master_addr_list_lock_key/1);
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+3 locks held by dhcpcd/323:
+ #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30
+ #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48
+ #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90
+
+stack backtrace:
+Call trace:
+ dump_backtrace+0x0/0x1e0
+ show_stack+0x20/0x30
+ dump_stack+0xec/0x158
+ __lock_acquire+0xca0/0x2398
+ lock_acquire+0xe8/0x440
+ _raw_spin_lock_nested+0x64/0x90
+ dev_mc_sync+0x44/0x90
+ dsa_slave_set_rx_mode+0x34/0x50
+ __dev_set_rx_mode+0x60/0xa0
+ dev_mc_sync+0x84/0x90
+ dsa_slave_set_rx_mode+0x34/0x50
+ __dev_set_rx_mode+0x60/0xa0
+ dev_set_rx_mode+0x30/0x48
+ __dev_open+0x10c/0x180
+ __dev_change_flags+0x170/0x1c8
+ dev_change_flags+0x2c/0x70
+ devinet_ioctl+0x774/0x878
+ inet_ioctl+0x348/0x3b0
+ sock_do_ioctl+0x50/0x310
+ sock_ioctl+0x1f8/0x580
+ ksys_ioctl+0xb0/0xf0
+ __arm64_sys_ioctl+0x28/0x38
+ el0_svc_common.constprop.0+0x7c/0x180
+ do_el0_svc+0x2c/0x98
+ el0_sync_handler+0x9c/0x1b8
+ el0_sync+0x158/0x180
+
+Since DSA never made use of the netdev API for describing links between
+upper devices and lower devices, the dev->lower_level value of a DSA
+switch interface would be 1, which would warn when it is a DSA master.
+
+We can use netdev_upper_dev_link() to describe the relationship between
+a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port)
+is an "upper" to a DSA "master" (host port). The relationship is "many
+uppers to one lower", like in the case of VLAN. So, for that reason, we
+use the same function as VLAN uses.
+
+There might be a chance that somebody will try to take hold of this
+interface and use it immediately after register_netdev() and before
+netdev_upper_dev_link(). To avoid that, we do the registration and
+linkage while holding the RTNL, and we use the RTNL-locked cousin of
+register_netdev(), which is register_netdevice().
+
+Since this warning was not there when lockdep was using dynamic keys for
+addr_list_lock, we are blaming the lockdep patch itself. The network
+stack _has_ been using static lockdep keys before, and it _is_ likely
+that stacked DSA setups have been triggering these lockdep warnings
+since forever, however I can't test very old kernels on this particular
+stacked DSA setup, to ensure I'm not in fact introducing regressions.
+
+Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key")
+Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/slave.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -1801,15 +1801,27 @@ int dsa_slave_create(struct dsa_port *po
+       dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
+-      ret = register_netdev(slave_dev);
++      rtnl_lock();
++
++      ret = register_netdevice(slave_dev);
+       if (ret) {
+               netdev_err(master, "error %d registering interface %s\n",
+                          ret, slave_dev->name);
++              rtnl_unlock();
+               goto out_phy;
+       }
++      ret = netdev_upper_dev_link(master, slave_dev, NULL);
++
++      rtnl_unlock();
++
++      if (ret)
++              goto out_unregister;
++
+       return 0;
++out_unregister:
++      unregister_netdev(slave_dev);
+ out_phy:
+       rtnl_lock();
+       phylink_disconnect_phy(p->dp->pl);
+@@ -1826,16 +1838,18 @@ out_free:
+ void dsa_slave_destroy(struct net_device *slave_dev)
+ {
++      struct net_device *master = dsa_slave_to_master(slave_dev);
+       struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+       struct dsa_slave_priv *p = netdev_priv(slave_dev);
+       netif_carrier_off(slave_dev);
+       rtnl_lock();
++      netdev_upper_dev_unlink(master, slave_dev);
++      unregister_netdevice(slave_dev);
+       phylink_disconnect_phy(dp->pl);
+       rtnl_unlock();
+       dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
+-      unregister_netdev(slave_dev);
+       phylink_destroy(dp->pl);
+       gro_cells_destroy(&p->gcells);
+       free_percpu(p->stats64);
diff --git a/queue-5.8/net-dsa-microchip-ksz8795-really-set-the-correct-number-of-ports.patch b/queue-5.8/net-dsa-microchip-ksz8795-really-set-the-correct-number-of-ports.patch
new file mode 100644 (file)
index 0000000..bf87014
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
+Date: Wed, 16 Sep 2020 12:08:39 +0200
+Subject: net: dsa: microchip: ksz8795: really set the correct number of ports
+
+From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
+
+[ Upstream commit fd944dc24336922656a48f4608bfb41abdcdc4aa ]
+
+The KSZ9477 and KSZ8795 use the port_cnt field differently: For the
+KSZ9477, it includes the CPU port(s), while for the KSZ8795, it doesn't.
+
+It would be a good cleanup to make the handling of both drivers match,
+but as a first step, fix the recently broken assignment of num_ports in
+the KSZ8795 driver (which completely broke probing, as the CPU port
+index was always failing the num_ports check).
+
+Fixes: af199a1a9cb0 ("net: dsa: microchip: set the correct number of ports")
+Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
+Reviewed-by: Codrin Ciubotariu <codrin.ciubotariu@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -1269,7 +1269,7 @@ static int ksz8795_switch_init(struct ks
+       }
+       /* set the real number of ports */
+-      dev->ds->num_ports = dev->port_cnt;
++      dev->ds->num_ports = dev->port_cnt + 1;
+       return 0;
+ }
diff --git a/queue-5.8/net-dsa-rtl8366-properly-clear-member-config.patch b/queue-5.8/net-dsa-rtl8366-properly-clear-member-config.patch
new file mode 100644 (file)
index 0000000..515793f
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Sat, 5 Sep 2020 12:32:33 +0200
+Subject: net: dsa: rtl8366: Properly clear member config
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+[ Upstream commit 4ddcaf1ebb5e4e99240f29d531ee69d4244fe416 ]
+
+When removing a port from a VLAN we are just erasing the
+member config for the VLAN, which is wrong: other ports
+can be using it.
+
+Just mask off the port and only zero out the rest of the
+member config once ports using of the VLAN are removed
+from it.
+
+Reported-by: Florian Fainelli <f.fainelli@gmail.com>
+Fixes: d8652956cf37 ("net: dsa: realtek-smi: Add Realtek SMI driver")
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/rtl8366.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/dsa/rtl8366.c
++++ b/drivers/net/dsa/rtl8366.c
+@@ -452,13 +452,19 @@ int rtl8366_vlan_del(struct dsa_switch *
+                               return ret;
+                       if (vid == vlanmc.vid) {
+-                              /* clear VLAN member configurations */
+-                              vlanmc.vid = 0;
+-                              vlanmc.priority = 0;
+-                              vlanmc.member = 0;
+-                              vlanmc.untag = 0;
+-                              vlanmc.fid = 0;
+-
++                              /* Remove this port from the VLAN */
++                              vlanmc.member &= ~BIT(port);
++                              vlanmc.untag &= ~BIT(port);
++                              /*
++                               * If no ports are members of this VLAN
++                               * anymore then clear the whole member
++                               * config so it can be reused.
++                               */
++                              if (!vlanmc.member && vlanmc.untag) {
++                                      vlanmc.vid = 0;
++                                      vlanmc.priority = 0;
++                                      vlanmc.fid = 0;
++                              }
+                               ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                               if (ret) {
+                                       dev_err(smi->dev,
diff --git a/queue-5.8/net-ethernet-ti-cpsw_new-fix-suspend-resume.patch b/queue-5.8/net-ethernet-ti-cpsw_new-fix-suspend-resume.patch
new file mode 100644 (file)
index 0000000..c4af8dd
--- /dev/null
@@ -0,0 +1,92 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+Date: Thu, 10 Sep 2020 23:52:29 +0300
+Subject: net: ethernet: ti: cpsw_new: fix suspend/resume
+
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+
+[ Upstream commit 5760d9acbe9514eec68eb70821d6fa5764f57042 ]
+
+Add missed suspend/resume callbacks to properly restore networking after
+suspend/resume cycle.
+
+Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac")
+Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ti/cpsw_new.c |   53 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 53 insertions(+)
+
+--- a/drivers/net/ethernet/ti/cpsw_new.c
++++ b/drivers/net/ethernet/ti/cpsw_new.c
+@@ -17,6 +17,7 @@
+ #include <linux/phy.h>
+ #include <linux/phy/phy.h>
+ #include <linux/delay.h>
++#include <linux/pinctrl/consumer.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/of.h>
+@@ -2070,9 +2071,61 @@ static int cpsw_remove(struct platform_d
+       return 0;
+ }
++static int __maybe_unused cpsw_suspend(struct device *dev)
++{
++      struct cpsw_common *cpsw = dev_get_drvdata(dev);
++      int i;
++
++      rtnl_lock();
++
++      for (i = 0; i < cpsw->data.slaves; i++) {
++              struct net_device *ndev = cpsw->slaves[i].ndev;
++
++              if (!(ndev && netif_running(ndev)))
++                      continue;
++
++              cpsw_ndo_stop(ndev);
++      }
++
++      rtnl_unlock();
++
++      /* Select sleep pin state */
++      pinctrl_pm_select_sleep_state(dev);
++
++      return 0;
++}
++
++static int __maybe_unused cpsw_resume(struct device *dev)
++{
++      struct cpsw_common *cpsw = dev_get_drvdata(dev);
++      int i;
++
++      /* Select default pin state */
++      pinctrl_pm_select_default_state(dev);
++
++      /* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */
++      rtnl_lock();
++
++      for (i = 0; i < cpsw->data.slaves; i++) {
++              struct net_device *ndev = cpsw->slaves[i].ndev;
++
++              if (!(ndev && netif_running(ndev)))
++                      continue;
++
++              cpsw_ndo_open(ndev);
++      }
++
++      rtnl_unlock();
++
++      return 0;
++}
++
++static SIMPLE_DEV_PM_OPS(cpsw_pm_ops, cpsw_suspend, cpsw_resume);
++
+ static struct platform_driver cpsw_driver = {
+       .driver = {
+               .name    = "cpsw-switch",
++              .pm      = &cpsw_pm_ops,
+               .of_match_table = cpsw_of_mtable,
+       },
+       .probe = cpsw_probe,
diff --git a/queue-5.8/net-fix-bridge-enslavement-failure.patch b/queue-5.8/net-fix-bridge-enslavement-failure.patch
new file mode 100644 (file)
index 0000000..340d6aa
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Thu, 10 Sep 2020 14:01:26 +0300
+Subject: net: Fix bridge enslavement failure
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit e1b9efe6baebe79019a2183176686a0e709388ae ]
+
+When a netdev is enslaved to a bridge, its parent identifier is queried.
+This is done so that packets that were already forwarded in hardware
+will not be forwarded again by the bridge device between netdevs
+belonging to the same hardware instance.
+
+The operation fails when the netdev is an upper of netdevs with
+different parent identifiers.
+
+Instead of failing the enslavement, have dev_get_port_parent_id() return
+'-EOPNOTSUPP' which will signal the bridge to skip the query operation.
+Other callers of the function are not affected by this change.
+
+Fixes: 7e1146e8c10c ("net: devlink: introduce devlink_compat_switch_id_get() helper")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reported-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -8641,7 +8641,7 @@ int dev_get_port_parent_id(struct net_de
+               if (!first.id_len)
+                       first = *ppid;
+               else if (memcmp(&first, ppid, sizeof(*ppid)))
+-                      return -ENODATA;
++                      return -EOPNOTSUPP;
+       }
+       return err;
diff --git a/queue-5.8/net-ipa-fix-u32_replace_bits-by-u32p_xxx-version.patch b/queue-5.8/net-ipa-fix-u32_replace_bits-by-u32p_xxx-version.patch
new file mode 100644 (file)
index 0000000..0b675d6
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vadym Kochan <vadym.kochan@plvision.eu>
+Date: Thu, 10 Sep 2020 18:41:52 +0300
+Subject: net: ipa: fix u32_replace_bits by u32p_xxx version
+
+From: Vadym Kochan <vadym.kochan@plvision.eu>
+
+[ Upstream commit c047dc1d260f2593035d63747d616c3512f9d6b6 ]
+
+Looks like u32p_replace_bits() should be used instead of
+u32_replace_bits() which does not modifies the value but returns the
+modified version.
+
+Fixes: 2b9feef2b6c2 ("soc: qcom: ipa: filter and routing tables")
+Signed-off-by: Vadym Kochan <vadym.kochan@plvision.eu>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_table.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ipa/ipa_table.c
++++ b/drivers/net/ipa/ipa_table.c
+@@ -521,7 +521,7 @@ static void ipa_filter_tuple_zero(struct
+       val = ioread32(endpoint->ipa->reg_virt + offset);
+       /* Zero all filter-related fields, preserving the rest */
+-      u32_replace_bits(val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL);
++      u32p_replace_bits(&val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL);
+       iowrite32(val, endpoint->ipa->reg_virt + offset);
+ }
+@@ -572,7 +572,7 @@ static void ipa_route_tuple_zero(struct
+       val = ioread32(ipa->reg_virt + offset);
+       /* Zero all route-related fields, preserving the rest */
+-      u32_replace_bits(val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL);
++      u32p_replace_bits(&val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL);
+       iowrite32(val, ipa->reg_virt + offset);
+ }
diff --git a/queue-5.8/net-ipv6-fix-kconfig-dependency-warning-for-ipv6_seg6_hmac.patch b/queue-5.8/net-ipv6-fix-kconfig-dependency-warning-for-ipv6_seg6_hmac.patch
new file mode 100644 (file)
index 0000000..316d42d
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
+Date: Thu, 17 Sep 2020 19:46:43 +0300
+Subject: net: ipv6: fix kconfig dependency warning for IPV6_SEG6_HMAC
+
+From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
+
+[ Upstream commit db7cd91a4be15e1485d6b58c6afc8761c59c4efb ]
+
+When IPV6_SEG6_HMAC is enabled and CRYPTO is disabled, it results in the
+following Kbuild warning:
+
+WARNING: unmet direct dependencies detected for CRYPTO_HMAC
+  Depends on [n]: CRYPTO [=n]
+  Selected by [y]:
+  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]
+
+WARNING: unmet direct dependencies detected for CRYPTO_SHA1
+  Depends on [n]: CRYPTO [=n]
+  Selected by [y]:
+  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]
+
+WARNING: unmet direct dependencies detected for CRYPTO_SHA256
+  Depends on [n]: CRYPTO [=n]
+  Selected by [y]:
+  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]
+
+The reason is that IPV6_SEG6_HMAC selects CRYPTO_HMAC, CRYPTO_SHA1, and
+CRYPTO_SHA256 without depending on or selecting CRYPTO while those configs
+are subordinate to CRYPTO.
+
+Honor the kconfig menu hierarchy to remove kconfig dependency warnings.
+
+Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support")
+Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/Kconfig
++++ b/net/ipv6/Kconfig
+@@ -303,6 +303,7 @@ config IPV6_SEG6_LWTUNNEL
+ config IPV6_SEG6_HMAC
+       bool "IPv6: Segment Routing HMAC support"
+       depends on IPV6
++      select CRYPTO
+       select CRYPTO_HMAC
+       select CRYPTO_SHA1
+       select CRYPTO_SHA256
diff --git a/queue-5.8/net-lantiq-disable-irqs-only-if-napi-gets-scheduled.patch b/queue-5.8/net-lantiq-disable-irqs-only-if-napi-gets-scheduled.patch
new file mode 100644 (file)
index 0000000..4969091
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Hauke Mehrtens <hauke@hauke-m.de>
+Date: Sat, 12 Sep 2020 21:36:29 +0200
+Subject: net: lantiq: Disable IRQs only if NAPI gets scheduled
+
+From: Hauke Mehrtens <hauke@hauke-m.de>
+
+[ Upstream commit 9423361da52356cb68642db5b2729b6b85aad330 ]
+
+The napi_schedule() call will only schedule the NAPI if it is not
+already running. To make sure that we do not deactivate interrupts
+without scheduling NAPI only deactivate the interrupts in case NAPI also
+gets scheduled.
+
+Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -345,10 +345,12 @@ static irqreturn_t xrx200_dma_irq(int ir
+ {
+       struct xrx200_chan *ch = ptr;
+-      ltq_dma_disable_irq(&ch->dma);
+-      ltq_dma_ack_irq(&ch->dma);
++      if (napi_schedule_prep(&ch->napi)) {
++              __napi_schedule(&ch->napi);
++              ltq_dma_disable_irq(&ch->dma);
++      }
+-      napi_schedule(&ch->napi);
++      ltq_dma_ack_irq(&ch->dma);
+       return IRQ_HANDLED;
+ }
diff --git a/queue-5.8/net-lantiq-use-napi_complete_done.patch b/queue-5.8/net-lantiq-use-napi_complete_done.patch
new file mode 100644 (file)
index 0000000..83400e8
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Hauke Mehrtens <hauke@hauke-m.de>
+Date: Sat, 12 Sep 2020 21:36:28 +0200
+Subject: net: lantiq: Use napi_complete_done()
+
+From: Hauke Mehrtens <hauke@hauke-m.de>
+
+[ Upstream commit c582a7fea9dad4d309437d1a7e22e6d2cb380e2e ]
+
+Use napi_complete_done() and activate the interrupts when this function
+returns true. This way the generic NAPI code can take care of activating
+the interrupts.
+
+Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -230,8 +230,8 @@ static int xrx200_poll_rx(struct napi_st
+       }
+       if (rx < budget) {
+-              napi_complete(&ch->napi);
+-              ltq_dma_enable_irq(&ch->dma);
++              if (napi_complete_done(&ch->napi, rx))
++                      ltq_dma_enable_irq(&ch->dma);
+       }
+       return rx;
+@@ -272,8 +272,8 @@ static int xrx200_tx_housekeeping(struct
+               netif_wake_queue(net_dev);
+       if (pkts < budget) {
+-              napi_complete(&ch->napi);
+-              ltq_dma_enable_irq(&ch->dma);
++              if (napi_complete_done(&ch->napi, pkts))
++                      ltq_dma_enable_irq(&ch->dma);
+       }
+       return pkts;
diff --git a/queue-5.8/net-lantiq-use-netif_tx_napi_add-for-tx-napi.patch b/queue-5.8/net-lantiq-use-netif_tx_napi_add-for-tx-napi.patch
new file mode 100644 (file)
index 0000000..89976e0
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Hauke Mehrtens <hauke@hauke-m.de>
+Date: Sat, 12 Sep 2020 21:36:27 +0200
+Subject: net: lantiq: use netif_tx_napi_add() for TX NAPI
+
+From: Hauke Mehrtens <hauke@hauke-m.de>
+
+[ Upstream commit 74c7b80e222b58d3cea731d31e2a31a77fea8345 ]
+
+netif_tx_napi_add() should be used for NAPI in the TX direction instead
+of the netif_napi_add() function.
+
+Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -502,7 +502,7 @@ static int xrx200_probe(struct platform_
+       /* setup NAPI */
+       netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32);
+-      netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32);
++      netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32);
+       platform_set_drvdata(pdev, priv);
diff --git a/queue-5.8/net-lantiq-wake-tx-queue-again.patch b/queue-5.8/net-lantiq-wake-tx-queue-again.patch
new file mode 100644 (file)
index 0000000..f1055b8
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Hauke Mehrtens <hauke@hauke-m.de>
+Date: Sat, 12 Sep 2020 21:36:26 +0200
+Subject: net: lantiq: Wake TX queue again
+
+From: Hauke Mehrtens <hauke@hauke-m.de>
+
+[ Upstream commit dea36631e6f186d4b853af67a4aef2e35cfa8bb7 ]
+
+The call to netif_wake_queue() when the TX descriptors were freed was
+missing. When there are no TX buffers available the TX queue will be
+stopped, but it was not started again when they are available again,
+this is fixed in this patch.
+
+Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver")
+Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -268,6 +268,9 @@ static int xrx200_tx_housekeeping(struct
+       net_dev->stats.tx_bytes += bytes;
+       netdev_completed_queue(ch->priv->net_dev, pkts, bytes);
++      if (netif_queue_stopped(net_dev))
++              netif_wake_queue(net_dev);
++
+       if (pkts < budget) {
+               napi_complete(&ch->napi);
+               ltq_dma_enable_irq(&ch->dma);
diff --git a/queue-5.8/net-macb-fix-for-pause-frame-receive-enable-bit.patch b/queue-5.8/net-macb-fix-for-pause-frame-receive-enable-bit.patch
new file mode 100644 (file)
index 0000000..12702c5
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Parshuram Thombare <pthombar@cadence.com>
+Date: Sat, 5 Sep 2020 10:21:33 +0200
+Subject: net: macb: fix for pause frame receive enable bit
+
+From: Parshuram Thombare <pthombar@cadence.com>
+
+[ Upstream commit d7739b0b6d15ef9ad5c79424736b8ded5ed3e913 ]
+
+PAE bit of NCFGR register, when set, pauses transmission
+if a non-zero 802.3 classic pause frame is received.
+
+Fixes: 7897b071ac3b ("net: macb: convert to phylink")
+Signed-off-by: Parshuram Thombare <pthombar@cadence.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cadence/macb_main.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/cadence/macb_main.c
++++ b/drivers/net/ethernet/cadence/macb_main.c
+@@ -647,8 +647,7 @@ static void macb_mac_link_up(struct phyl
+                               ctrl |= GEM_BIT(GBE);
+               }
+-              /* We do not support MLO_PAUSE_RX yet */
+-              if (tx_pause)
++              if (rx_pause)
+                       ctrl |= MACB_BIT(PAE);
+               macb_set_tx_clk(bp->tx_clk, speed, ndev);
diff --git a/queue-5.8/net-mlx5-fix-fte-cleanup.patch b/queue-5.8/net-mlx5-fix-fte-cleanup.patch
new file mode 100644 (file)
index 0000000..f77a8d9
--- /dev/null
@@ -0,0 +1,103 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Maor Gottlieb <maorg@nvidia.com>
+Date: Mon, 31 Aug 2020 20:50:42 +0300
+Subject: net/mlx5: Fix FTE cleanup
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit cefc23554fc259114e78a7b0908aac4610ee18eb ]
+
+Currently, when an FTE is allocated, its refcount is decreased to 0
+with the purpose it will not be a stand alone steering object and every
+rule (destination) of the FTE would increase the refcount.
+When mlx5_cleanup_fs is called while not all rules were deleted by the
+steering users, it hit refcount underflow on the FTE once clean_tree
+calls to tree_remove_node after the deleted rules already decreased
+the refcount to 0.
+
+FTE is no longer destroyed implicitly when the last rule (destination)
+is deleted. mlx5_del_flow_rules avoids it by increasing the refcount on
+the FTE and destroy it explicitly after all rules were deleted. So we
+can avoid the refcount underflow by making FTE as stand alone object.
+In addition need to set del_hw_func to FTE so the HW object will be
+destroyed when the FTE is deleted from the cleanup_tree flow.
+
+refcount_t: underflow; use-after-free.
+WARNING: CPU: 2 PID: 15715 at lib/refcount.c:28 refcount_warn_saturate+0xd9/0xe0
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ tree_put_node+0xf2/0x140 [mlx5_core]
+ clean_tree+0x4e/0xf0 [mlx5_core]
+ clean_tree+0x4e/0xf0 [mlx5_core]
+ clean_tree+0x4e/0xf0 [mlx5_core]
+ clean_tree+0x5f/0xf0 [mlx5_core]
+ clean_tree+0x4e/0xf0 [mlx5_core]
+ clean_tree+0x5f/0xf0 [mlx5_core]
+ mlx5_cleanup_fs+0x26/0x270 [mlx5_core]
+ mlx5_unload+0x2e/0xa0 [mlx5_core]
+ mlx5_unload_one+0x51/0x120 [mlx5_core]
+ mlx5_devlink_reload_down+0x51/0x90 [mlx5_core]
+ devlink_reload+0x39/0x120
+ ? devlink_nl_cmd_reload+0x43/0x220
+ genl_rcv_msg+0x1e4/0x420
+ ? genl_family_rcv_msg_attrs_parse+0x100/0x100
+ netlink_rcv_skb+0x47/0x110
+ genl_rcv+0x24/0x40
+ netlink_unicast+0x217/0x2f0
+ netlink_sendmsg+0x30f/0x430
+ sock_sendmsg+0x30/0x40
+ __sys_sendto+0x10e/0x140
+ ? handle_mm_fault+0xc4/0x1f0
+ ? do_page_fault+0x33f/0x630
+ __x64_sys_sendto+0x24/0x30
+ do_syscall_64+0x48/0x130
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes")
+Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -655,7 +655,7 @@ static struct fs_fte *alloc_fte(struct m
+       fte->action = *flow_act;
+       fte->flow_context = spec->flow_context;
+-      tree_init_node(&fte->node, NULL, del_sw_fte);
++      tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+       return fte;
+ }
+@@ -1792,7 +1792,6 @@ skip_search:
+               up_write_ref_node(&g->node, false);
+               rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+               up_write_ref_node(&fte->node, false);
+-              tree_put_node(&fte->node, false);
+               return rule;
+       }
+       rule = ERR_PTR(-ENOENT);
+@@ -1891,7 +1890,6 @@ search_again_locked:
+       up_write_ref_node(&g->node, false);
+       rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+       up_write_ref_node(&fte->node, false);
+-      tree_put_node(&fte->node, false);
+       tree_put_node(&g->node, false);
+       return rule;
+@@ -2001,7 +1999,9 @@ void mlx5_del_flow_rules(struct mlx5_flo
+               up_write_ref_node(&fte->node, false);
+       } else {
+               del_hw_fte(&fte->node);
+-              up_write(&fte->node.lock);
++              /* Avoid double call to del_hw_fte */
++              fte->node.del_hw_func = NULL;
++              up_write_ref_node(&fte->node, false);
+               tree_put_node(&fte->node, false);
+       }
+       kfree(handle);
diff --git a/queue-5.8/net-mlx5e-enable-adding-peer-miss-rules-only-if-merged-eswitch-is-supported.patch b/queue-5.8/net-mlx5e-enable-adding-peer-miss-rules-only-if-merged-eswitch-is-supported.patch
new file mode 100644 (file)
index 0000000..50d8357
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Maor Dickman <maord@mellanox.com>
+Date: Wed, 5 Aug 2020 17:56:04 +0300
+Subject: net/mlx5e: Enable adding peer miss rules only if merged eswitch is supported
+
+From: Maor Dickman <maord@mellanox.com>
+
+[ Upstream commit 6cec0229ab1959259e71e9a5bbe47c04577950b1 ]
+
+The cited commit creates peer miss group during switchdev mode
+initialization in order to handle miss packets correctly while in VF
+LAG mode. This is done regardless of FW support of such groups which
+could cause rules setups failure later on.
+
+Fix by adding FW capability check before creating peer groups/rule.
+
+Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules")
+Signed-off-by: Maor Dickman <maord@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Raed Salem <raeds@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |   64 ++++++-------
+ 1 file changed, 34 insertions(+), 30 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -1219,35 +1219,37 @@ static int esw_create_offloads_fdb_table
+       }
+       esw->fdb_table.offloads.send_to_vport_grp = g;
+-      /* create peer esw miss group */
+-      memset(flow_group_in, 0, inlen);
+-
+-      esw_set_flow_group_source_port(esw, flow_group_in);
+-
+-      if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+-              match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+-                                            flow_group_in,
+-                                            match_criteria);
+-
+-              MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+-                               misc_parameters.source_eswitch_owner_vhca_id);
+-
+-              MLX5_SET(create_flow_group_in, flow_group_in,
+-                       source_eswitch_owner_vhca_id_valid, 1);
+-      }
+-
+-      MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+-      MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+-               ix + esw->total_vports - 1);
+-      ix += esw->total_vports;
+-
+-      g = mlx5_create_flow_group(fdb, flow_group_in);
+-      if (IS_ERR(g)) {
+-              err = PTR_ERR(g);
+-              esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+-              goto peer_miss_err;
++      if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
++              /* create peer esw miss group */
++              memset(flow_group_in, 0, inlen);
++
++              esw_set_flow_group_source_port(esw, flow_group_in);
++
++              if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
++                      match_criteria = MLX5_ADDR_OF(create_flow_group_in,
++                                                    flow_group_in,
++                                                    match_criteria);
++
++                      MLX5_SET_TO_ONES(fte_match_param, match_criteria,
++                                       misc_parameters.source_eswitch_owner_vhca_id);
++
++                      MLX5_SET(create_flow_group_in, flow_group_in,
++                               source_eswitch_owner_vhca_id_valid, 1);
++              }
++
++              MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
++              MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
++                       ix + esw->total_vports - 1);
++              ix += esw->total_vports;
++
++              g = mlx5_create_flow_group(fdb, flow_group_in);
++              if (IS_ERR(g)) {
++                      err = PTR_ERR(g);
++                      esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
++                      goto peer_miss_err;
++              }
++              esw->fdb_table.offloads.peer_miss_grp = g;
+       }
+-      esw->fdb_table.offloads.peer_miss_grp = g;
+       /* create miss group */
+       memset(flow_group_in, 0, inlen);
+@@ -1282,7 +1284,8 @@ static int esw_create_offloads_fdb_table
+ miss_rule_err:
+       mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+ miss_err:
+-      mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
++      if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
++              mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ peer_miss_err:
+       mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ send_vport_err:
+@@ -1306,7 +1309,8 @@ static void esw_destroy_offloads_fdb_tab
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+       mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+-      mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
++      if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
++              mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+       mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+       mlx5_esw_chains_destroy(esw);
diff --git a/queue-5.8/net-mlx5e-fix-endianness-when-calculating-pedit-mask-first-bit.patch b/queue-5.8/net-mlx5e-fix-endianness-when-calculating-pedit-mask-first-bit.patch
new file mode 100644 (file)
index 0000000..f01de32
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Maor Dickman <maord@nvidia.com>
+Date: Wed, 2 Sep 2020 16:49:52 +0300
+Subject: net/mlx5e: Fix endianness when calculating pedit mask first bit
+
+From: Maor Dickman <maord@nvidia.com>
+
+[ Upstream commit 82198d8bcdeff01d19215d712aa55031e21bccbc ]
+
+The field mask value is provided in network byte order and has to
+be converted to host byte order before calculating pedit mask
+first bit.
+
+Fixes: 88f30bbcbaaa ("net/mlx5e: Bit sized fields rewrite support")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |   34 ++++++++++++++----------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -2731,6 +2731,22 @@ static struct mlx5_fields fields[] = {
+       OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
+ };
++static unsigned long mask_to_le(unsigned long mask, int size)
++{
++      __be32 mask_be32;
++      __be16 mask_be16;
++
++      if (size == 32) {
++              mask_be32 = (__force __be32)(mask);
++              mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
++      } else if (size == 16) {
++              mask_be32 = (__force __be32)(mask);
++              mask_be16 = *(__be16 *)&mask_be32;
++              mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
++      }
++
++      return mask;
++}
+ static int offload_pedit_fields(struct mlx5e_priv *priv,
+                               int namespace,
+                               struct pedit_headers_action *hdrs,
+@@ -2744,9 +2760,7 @@ static int offload_pedit_fields(struct m
+       u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
+       struct mlx5e_tc_mod_hdr_acts *mod_acts;
+       struct mlx5_fields *f;
+-      unsigned long mask;
+-      __be32 mask_be32;
+-      __be16 mask_be16;
++      unsigned long mask, field_mask;
+       int err;
+       u8 cmd;
+@@ -2812,14 +2826,7 @@ static int offload_pedit_fields(struct m
+               if (skip)
+                       continue;
+-              if (f->field_bsize == 32) {
+-                      mask_be32 = (__force __be32)(mask);
+-                      mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
+-              } else if (f->field_bsize == 16) {
+-                      mask_be32 = (__force __be32)(mask);
+-                      mask_be16 = *(__be16 *)&mask_be32;
+-                      mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+-              }
++              mask = mask_to_le(mask, f->field_bsize);
+               first = find_first_bit(&mask, f->field_bsize);
+               next_z = find_next_zero_bit(&mask, f->field_bsize, first);
+@@ -2850,9 +2857,10 @@ static int offload_pedit_fields(struct m
+               if (cmd == MLX5_ACTION_TYPE_SET) {
+                       int start;
++                      field_mask = mask_to_le(f->field_mask, f->field_bsize);
++
+                       /* if field is bit sized it can start not from first bit */
+-                      start = find_first_bit((unsigned long *)&f->field_mask,
+-                                             f->field_bsize);
++                      start = find_first_bit(&field_mask, f->field_bsize);
+                       MLX5_SET(set_action_in, action, offset, first - start);
+                       /* length is num of bits to be written, zero means length of 32 */
diff --git a/queue-5.8/net-mlx5e-fix-memory-leak-of-tunnel-info-when-rule-under-multipath-not-ready.patch b/queue-5.8/net-mlx5e-fix-memory-leak-of-tunnel-info-when-rule-under-multipath-not-ready.patch
new file mode 100644 (file)
index 0000000..2f08937
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Jianbo Liu <jianbol@mellanox.com>
+Date: Tue, 7 Jul 2020 06:16:24 +0000
+Subject: net/mlx5e: Fix memory leak of tunnel info when rule under multipath not ready
+
+From: Jianbo Liu <jianbol@mellanox.com>
+
+[ Upstream commit 12a240a41427d37b5e70570700704e84c827452f ]
+
+When deleting vxlan flow rule under multipath, tun_info in parse_attr is
+not freed when the rule is not ready.
+
+Fixes: ef06c9ee8933 ("net/mlx5e: Allow one failure when offloading tc encap rules under multipath")
+Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1399,11 +1399,8 @@ static void mlx5e_tc_del_fdb_flow(struct
+       mlx5e_put_flow_tunnel_id(flow);
+-      if (flow_flag_test(flow, NOT_READY)) {
++      if (flow_flag_test(flow, NOT_READY))
+               remove_unready_flow(flow);
+-              kvfree(attr->parse_attr);
+-              return;
+-      }
+       if (mlx5e_is_offloaded_flow(flow)) {
+               if (flow_flag_test(flow, SLOW))
diff --git a/queue-5.8/net-mlx5e-tls-do-not-expose-fpga-tls-counter-if-not-supported.patch b/queue-5.8/net-mlx5e-tls-do-not-expose-fpga-tls-counter-if-not-supported.patch
new file mode 100644 (file)
index 0000000..09bc5a2
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Tariq Toukan <tariqt@mellanox.com>
+Date: Sun, 28 Jun 2020 13:06:06 +0300
+Subject: net/mlx5e: TLS, Do not expose FPGA TLS counter if not supported
+
+From: Tariq Toukan <tariqt@mellanox.com>
+
+[ Upstream commit 8f0bcd19b1da3f264223abea985b9462e85a3718 ]
+
+The set of TLS TX global SW counters in mlx5e_tls_sw_stats_desc
+is updated from all rings by using atomic ops.
+This set of stats is used only in the FPGA TLS use case, not in
+the Connect-X TLS one, where regular per-ring counters are used.
+
+Do not expose them in the Connect-X use case, as this would cause
+counter duplication. For example, tx_tls_drop_no_sync_data would
+appear twice in the ethtool stats.
+
+Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c |   12 +++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+@@ -35,7 +35,6 @@
+ #include <net/sock.h>
+ #include "en.h"
+-#include "accel/tls.h"
+ #include "fpga/sdk.h"
+ #include "en_accel/tls.h"
+@@ -51,9 +50,14 @@ static const struct counter_desc mlx5e_t
+ #define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
++static bool is_tls_atomic_stats(struct mlx5e_priv *priv)
++{
++      return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev);
++}
++
+ int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+ {
+-      if (!priv->tls)
++      if (!is_tls_atomic_stats(priv))
+               return 0;
+       return NUM_TLS_SW_COUNTERS;
+@@ -63,7 +67,7 @@ int mlx5e_tls_get_strings(struct mlx5e_p
+ {
+       unsigned int i, idx = 0;
+-      if (!priv->tls)
++      if (!is_tls_atomic_stats(priv))
+               return 0;
+       for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+@@ -77,7 +81,7 @@ int mlx5e_tls_get_stats(struct mlx5e_pri
+ {
+       int i, idx = 0;
+-      if (!priv->tls)
++      if (!is_tls_atomic_stats(priv))
+               return 0;
+       for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
diff --git a/queue-5.8/net-mlx5e-use-rcu-to-protect-rq-xdp_prog.patch b/queue-5.8/net-mlx5e-use-rcu-to-protect-rq-xdp_prog.patch
new file mode 100644 (file)
index 0000000..c101964
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Thu, 11 Jun 2020 13:55:19 +0300
+Subject: net/mlx5e: Use RCU to protect rq->xdp_prog
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit fe45386a208277cae4648106133c08246eecd012 ]
+
+Currently, the RQs are temporarily deactivated while hot-replacing the
+XDP program, and napi_synchronize is used to make sure rq->xdp_prog is
+not in use. However, napi_synchronize is not ideal: instead of waiting
+till the end of a NAPI cycle, it polls and waits until NAPI is not
+running, sleeping for 1ms between the periodic checks. Under heavy
+workloads, this loop will never end, which may even lead to a kernel
+panic if the kernel detects the hangup. Such workloads include XSK TX
+and possibly also heavy RX (XSK or normal).
+
+The fix is inspired by commit 326fe02d1ed6 ("net/mlx4_en: protect
+ring->xdp_prog with rcu_read_lock"). As mlx5e_xdp_handle is already
+protected by rcu_read_lock, and bpf_prog_put uses call_rcu to free the
+program, there is no need for additional synchronization if proper RCU
+functions are used to access the pointer. This patch converts all
+accesses to rq->xdp_prog to use RCU functions.
+
+Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support")
+Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h      |    2 
+ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c  |    2 
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   53 ++++++++++------------
+ 3 files changed, 27 insertions(+), 30 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -604,7 +604,7 @@ struct mlx5e_rq {
+       struct dim         dim; /* Dynamic Interrupt Moderation */
+       /* XDP */
+-      struct bpf_prog       *xdp_prog;
++      struct bpf_prog __rcu *xdp_prog;
+       struct mlx5e_xdpsq    *xdpsq;
+       DECLARE_BITMAP(flags, 8);
+       struct page_pool      *page_pool;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+@@ -121,7 +121,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *
+ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                     u32 *len, struct xdp_buff *xdp)
+ {
+-      struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
++      struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
+       u32 act;
+       int err;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -401,7 +401,7 @@ static int mlx5e_alloc_rq(struct mlx5e_c
+       if (params->xdp_prog)
+               bpf_prog_inc(params->xdp_prog);
+-      rq->xdp_prog = params->xdp_prog;
++      RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
+       rq_xdp_ix = rq->ix;
+       if (xsk)
+@@ -410,7 +410,7 @@ static int mlx5e_alloc_rq(struct mlx5e_c
+       if (err < 0)
+               goto err_rq_wq_destroy;
+-      rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
++      rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+       rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+       pool_size = 1 << params->log_rq_mtu_frames;
+@@ -605,8 +605,8 @@ err_free:
+       }
+ err_rq_wq_destroy:
+-      if (rq->xdp_prog)
+-              bpf_prog_put(rq->xdp_prog);
++      if (params->xdp_prog)
++              bpf_prog_put(params->xdp_prog);
+       xdp_rxq_info_unreg(&rq->xdp_rxq);
+       page_pool_destroy(rq->page_pool);
+       mlx5_wq_destroy(&rq->wq_ctrl);
+@@ -616,10 +616,16 @@ err_rq_wq_destroy:
+ static void mlx5e_free_rq(struct mlx5e_rq *rq)
+ {
++      struct mlx5e_channel *c = rq->channel;
++      struct bpf_prog *old_prog = NULL;
+       int i;
+-      if (rq->xdp_prog)
+-              bpf_prog_put(rq->xdp_prog);
++      /* drop_rq has neither channel nor xdp_prog. */
++      if (c)
++              old_prog = rcu_dereference_protected(rq->xdp_prog,
++                                                   lockdep_is_held(&c->priv->state_lock));
++      if (old_prog)
++              bpf_prog_put(old_prog);
+       switch (rq->wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+@@ -4423,6 +4429,16 @@ static int mlx5e_xdp_allowed(struct mlx5
+       return 0;
+ }
++static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
++{
++      struct bpf_prog *old_prog;
++
++      old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
++                                     lockdep_is_held(&rq->channel->priv->state_lock));
++      if (old_prog)
++              bpf_prog_put(old_prog);
++}
++
+ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+@@ -4481,29 +4497,10 @@ static int mlx5e_xdp_set(struct net_devi
+        */
+       for (i = 0; i < priv->channels.num; i++) {
+               struct mlx5e_channel *c = priv->channels.c[i];
+-              bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+-
+-              clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+-              if (xsk_open)
+-                      clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+-              napi_synchronize(&c->napi);
+-              /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+-
+-              old_prog = xchg(&c->rq.xdp_prog, prog);
+-              if (old_prog)
+-                      bpf_prog_put(old_prog);
+-
+-              if (xsk_open) {
+-                      old_prog = xchg(&c->xskrq.xdp_prog, prog);
+-                      if (old_prog)
+-                              bpf_prog_put(old_prog);
+-              }
+-              set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+-              if (xsk_open)
+-                      set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+-              /* napi_schedule in case we have missed anything */
+-              napi_schedule(&c->napi);
++              mlx5e_rq_replace_xdp_prog(&c->rq, prog);
++              if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
++                      mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
+       }
+ unlock:
diff --git a/queue-5.8/net-mlx5e-use-synchronize_rcu-to-sync-with-napi.patch b/queue-5.8/net-mlx5e-use-synchronize_rcu-to-sync-with-napi.patch
new file mode 100644 (file)
index 0000000..37bb723
--- /dev/null
@@ -0,0 +1,241 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Thu, 11 Jun 2020 14:25:19 +0300
+Subject: net/mlx5e: Use synchronize_rcu to sync with NAPI
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 9c25a22dfb00270372224721fed646965420323a ]
+
+As described in the previous commit, napi_synchronize doesn't quite fit
+the purpose when we just need to wait until the currently running NAPI
+quits. Its implementation waits until NAPI is not running by polling and
+waiting for 1ms in between. In cases where we need to deactivate one
+queue (e.g., recovery flows) or where we deactivate them one-by-one
+(deactivate channel flow), we may get stuck in napi_synchronize forever
+if other queues keep NAPI active, causing a soft lockup. Depending on
+kernel configuration (CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC), it may result
+in a kernel panic.
+
+To fix the issue, use synchronize_rcu to wait for NAPI to quit, and wrap
+the whole NAPI in rcu_read_lock.
+
+Fixes: acc6c5953af1 ("net/mlx5e: Split open/close channels to stages")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c    |   14 ++------------
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |    3 +--
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c      |   12 ++++--------
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c        |   12 ++----------
+ drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c      |   17 +++++++++++++----
+ 5 files changed, 22 insertions(+), 36 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+@@ -31,7 +31,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
+ {
+       struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
+       u32 cqe_bcnt32 = cqe_bcnt;
+-      bool consumed;
+       /* Check packet size. Note LRO doesn't use linear SKB */
+       if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+@@ -51,10 +50,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
+       xsk_buff_dma_sync_for_cpu(xdp);
+       prefetch(xdp->data);
+-      rcu_read_lock();
+-      consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
+-      rcu_read_unlock();
+-
+       /* Possible flows:
+        * - XDP_REDIRECT to XSKMAP:
+        *   The page is owned by the userspace from now.
+@@ -70,7 +65,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
+        * allocated first from the Reuse Ring, so it has enough space.
+        */
+-      if (likely(consumed)) {
++      if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+               if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+                       __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+               return NULL; /* page/packet was consumed by XDP */
+@@ -88,7 +83,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
+                                             u32 cqe_bcnt)
+ {
+       struct xdp_buff *xdp = wi->di->xsk;
+-      bool consumed;
+       /* wi->offset is not used in this function, because xdp->data and the
+        * DMA address point directly to the necessary place. Furthermore, the
+@@ -107,11 +101,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
+               return NULL;
+       }
+-      rcu_read_lock();
+-      consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
+-      rcu_read_unlock();
+-
+-      if (likely(consumed))
++      if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+               return NULL; /* page/packet was consumed by XDP */
+       /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+@@ -143,8 +143,7 @@ err_free_cparam:
+ void mlx5e_close_xsk(struct mlx5e_channel *c)
+ {
+       clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+-      napi_synchronize(&c->napi);
+-      synchronize_rcu(); /* Sync with the XSK wakeup. */
++      synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
+       mlx5e_close_rq(&c->xskrq);
+       mlx5e_close_cq(&c->xskrq.cq);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -911,7 +911,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *
+ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+ {
+       clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+-      napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
++      synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+ }
+ void mlx5e_close_rq(struct mlx5e_rq *rq)
+@@ -1356,12 +1356,10 @@ void mlx5e_tx_disable_queue(struct netde
+ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
+ {
+-      struct mlx5e_channel *c = sq->channel;
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+-      /* prevent netif_tx_wake_queue */
+-      napi_synchronize(&c->napi);
++      synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
+       mlx5e_tx_disable_queue(sq->txq);
+@@ -1436,10 +1434,8 @@ void mlx5e_activate_icosq(struct mlx5e_i
+ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+ {
+-      struct mlx5e_channel *c = icosq->channel;
+-
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+-      napi_synchronize(&c->napi);
++      synchronize_rcu(); /* Sync with NAPI. */
+ }
+ void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+@@ -1517,7 +1513,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdps
+       struct mlx5e_channel *c = sq->channel;
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+-      napi_synchronize(&c->napi);
++      synchronize_rcu(); /* Sync with NAPI. */
+       mlx5e_destroy_sq(c->mdev, sq->sqn);
+       mlx5e_free_xdpsq_descs(sq);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1072,7 +1072,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
+       struct xdp_buff xdp;
+       struct sk_buff *skb;
+       void *va, *data;
+-      bool consumed;
+       u32 frag_size;
+       va             = page_address(di->page) + wi->offset;
+@@ -1084,11 +1083,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
+       prefetchw(va); /* xdp_frame data area */
+       prefetch(data);
+-      rcu_read_lock();
+       mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+-      consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp);
+-      rcu_read_unlock();
+-      if (consumed)
++      if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
+               return NULL; /* page/packet was consumed by XDP */
+       rx_headroom = xdp.data - xdp.data_hard_start;
+@@ -1369,7 +1365,6 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
+       struct sk_buff *skb;
+       void *va, *data;
+       u32 frag_size;
+-      bool consumed;
+       /* Check packet size. Note LRO doesn't use linear SKB */
+       if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+@@ -1386,11 +1381,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
+       prefetchw(va); /* xdp_frame data area */
+       prefetch(data);
+-      rcu_read_lock();
+       mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+-      consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp);
+-      rcu_read_unlock();
+-      if (consumed) {
++      if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
+               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+                       __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+               return NULL; /* page/packet was consumed by XDP */
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+@@ -121,13 +121,17 @@ int mlx5e_napi_poll(struct napi_struct *
+       struct mlx5e_xdpsq *xsksq = &c->xsksq;
+       struct mlx5e_rq *xskrq = &c->xskrq;
+       struct mlx5e_rq *rq = &c->rq;
+-      bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+       bool aff_change = false;
+       bool busy_xsk = false;
+       bool busy = false;
+       int work_done = 0;
++      bool xsk_open;
+       int i;
++      rcu_read_lock();
++
++      xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
++
+       ch_stats->poll++;
+       for (i = 0; i < c->num_tc; i++)
+@@ -167,8 +171,10 @@ int mlx5e_napi_poll(struct napi_struct *
+       busy |= busy_xsk;
+       if (busy) {
+-              if (likely(mlx5e_channel_no_affinity_change(c)))
+-                      return budget;
++              if (likely(mlx5e_channel_no_affinity_change(c))) {
++                      work_done = budget;
++                      goto out;
++              }
+               ch_stats->aff_change++;
+               aff_change = true;
+               if (budget && work_done == budget)
+@@ -176,7 +182,7 @@ int mlx5e_napi_poll(struct napi_struct *
+       }
+       if (unlikely(!napi_complete_done(napi, work_done)))
+-              return work_done;
++              goto out;
+       ch_stats->arm++;
+@@ -203,6 +209,9 @@ int mlx5e_napi_poll(struct napi_struct *
+               ch_stats->force_irq++;
+       }
++out:
++      rcu_read_unlock();
++
+       return work_done;
+ }
diff --git a/queue-5.8/net-phy-avoid-npd-upon-phy_detach-when-driver-is-unbound.patch b/queue-5.8/net-phy-avoid-npd-upon-phy_detach-when-driver-is-unbound.patch
new file mode 100644 (file)
index 0000000..aa3ff0c
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 16 Sep 2020 20:43:09 -0700
+Subject: net: phy: Avoid NPD upon phy_detach() when driver is unbound
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit c2b727df7caa33876e7066bde090f40001b6d643 ]
+
+If we have unbound the PHY driver prior to calling phy_detach() (often
+via phy_disconnect()) then we can cause a NULL pointer de-reference
+accessing the driver owner member. The steps to reproduce are:
+
+echo unimac-mdio-0:01 > /sys/class/net/eth0/phydev/driver/unbind
+ip link set eth0 down
+
+Fixes: cafe8df8b9bc ("net: phy: Fix lack of reference count on PHY driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1631,7 +1631,8 @@ void phy_detach(struct phy_device *phyde
+       phy_led_triggers_unregister(phydev);
+-      module_put(phydev->mdio.dev.driver->owner);
++      if (phydev->mdio.dev.driver)
++              module_put(phydev->mdio.dev.driver->owner);
+       /* If the device had no specific driver before (i.e. - it
+        * was using the generic driver), we unbind the device
diff --git a/queue-5.8/net-phy-call-phy_disable_interrupts-in-phy_attach_direct-instead.patch b/queue-5.8/net-phy-call-phy_disable_interrupts-in-phy_attach_direct-instead.patch
new file mode 100644 (file)
index 0000000..15bf2d7
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+Date: Wed, 9 Sep 2020 14:43:14 +0900
+Subject: net: phy: call phy_disable_interrupts() in phy_attach_direct() instead
+
+From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+[ Upstream commit 7d3ba9360c6dac7c077fbd6631e08f32ea2bcd53 ]
+
+Since the micrel phy driver calls phy_init_hw() as a workaround,
+the commit 9886a4dbd2aa ("net: phy: call phy_disable_interrupts()
+in phy_init_hw()") disables the interrupt unexpectedly. So,
+call phy_disable_interrupts() in phy_attach_direct() instead.
+Otherwise, the phy cannot link up after the ethernet cable was
+disconnected.
+
+Note that other drivers (like at803x.c) also calls phy_init_hw().
+So, perhaps, the driver caused a similar issue too.
+
+Fixes: 9886a4dbd2aa ("net: phy: call phy_disable_interrupts() in phy_init_hw()")
+Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1092,10 +1092,6 @@ int phy_init_hw(struct phy_device *phyde
+       if (ret < 0)
+               return ret;
+-      ret = phy_disable_interrupts(phydev);
+-      if (ret)
+-              return ret;
+-
+       if (phydev->drv->config_init)
+               ret = phydev->drv->config_init(phydev);
+@@ -1372,6 +1368,10 @@ int phy_attach_direct(struct net_device
+       if (err)
+               goto error;
++      err = phy_disable_interrupts(phydev);
++      if (err)
++              return err;
++
+       phy_resume(phydev);
+       phy_led_triggers_register(phydev);
diff --git a/queue-5.8/net-phy-do-not-warn-in-phy_stop-on-phy_down.patch b/queue-5.8/net-phy-do-not-warn-in-phy_stop-on-phy_down.patch
new file mode 100644 (file)
index 0000000..fac87a9
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 16 Sep 2020 20:43:10 -0700
+Subject: net: phy: Do not warn in phy_stop() on PHY_DOWN
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 5116a8ade333b6c2e180782139c9c516a437b21c ]
+
+When phy_is_started() was added to catch incorrect PHY states,
+phy_stop() would not be qualified against PHY_DOWN. It is possible to
+reach that state when the PHY driver has been unbound and the network
+device is then brought down.
+
+Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -948,7 +948,7 @@ void phy_stop(struct phy_device *phydev)
+ {
+       struct net_device *dev = phydev->attached_dev;
+-      if (!phy_is_started(phydev)) {
++      if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) {
+               WARN(1, "called from state %s\n",
+                    phy_state_to_str(phydev->state));
+               return;
diff --git a/queue-5.8/net-qrtr-check-skb_put_padto-return-value.patch b/queue-5.8/net-qrtr-check-skb_put_padto-return-value.patch
new file mode 100644 (file)
index 0000000..d86ffb3
--- /dev/null
@@ -0,0 +1,166 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 9 Sep 2020 01:27:39 -0700
+Subject: net: qrtr: check skb_put_padto() return value
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3ca1a42a52ca4b4f02061683851692ad65fefac8 ]
+
+If skb_put_padto() returns an error, skb has been freed.
+Better not touch it anymore, as reported by syzbot [1]
+
+Note to qrtr maintainers : this suggests qrtr_sendmsg()
+should adjust sock_alloc_send_skb() second parameter
+to account for the potential added alignment to avoid
+reallocation.
+
+[1]
+
+BUG: KASAN: use-after-free in __skb_insert include/linux/skbuff.h:1907 [inline]
+BUG: KASAN: use-after-free in __skb_queue_before include/linux/skbuff.h:2016 [inline]
+BUG: KASAN: use-after-free in __skb_queue_tail include/linux/skbuff.h:2049 [inline]
+BUG: KASAN: use-after-free in skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146
+Write of size 8 at addr ffff88804d8ab3c0 by task syz-executor.4/4316
+
+CPU: 1 PID: 4316 Comm: syz-executor.4 Not tainted 5.9.0-rc4-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1d6/0x29e lib/dump_stack.c:118
+ print_address_description+0x66/0x620 mm/kasan/report.c:383
+ __kasan_report mm/kasan/report.c:513 [inline]
+ kasan_report+0x132/0x1d0 mm/kasan/report.c:530
+ __skb_insert include/linux/skbuff.h:1907 [inline]
+ __skb_queue_before include/linux/skbuff.h:2016 [inline]
+ __skb_queue_tail include/linux/skbuff.h:2049 [inline]
+ skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146
+ qrtr_tun_send+0x1a/0x40 net/qrtr/tun.c:23
+ qrtr_node_enqueue+0x44f/0xc00 net/qrtr/qrtr.c:364
+ qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861
+ qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
+ sock_sendmsg_nosec net/socket.c:651 [inline]
+ sock_sendmsg net/socket.c:671 [inline]
+ sock_write_iter+0x317/0x470 net/socket.c:998
+ call_write_iter include/linux/fs.h:1882 [inline]
+ new_sync_write fs/read_write.c:503 [inline]
+ vfs_write+0xa96/0xd10 fs/read_write.c:578
+ ksys_write+0x11b/0x220 fs/read_write.c:631
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x45d5b9
+Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f84b5b81c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000038b40 RCX: 000000000045d5b9
+RDX: 0000000000000055 RSI: 0000000020001240 RDI: 0000000000000003
+RBP: 00007f84b5b81ca0 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000f
+R13: 00007ffcbbf86daf R14: 00007f84b5b829c0 R15: 000000000118cf4c
+
+Allocated by task 4316:
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track mm/kasan/common.c:56 [inline]
+ __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461
+ slab_post_alloc_hook+0x3e/0x290 mm/slab.h:518
+ slab_alloc mm/slab.c:3312 [inline]
+ kmem_cache_alloc+0x1c1/0x2d0 mm/slab.c:3482
+ skb_clone+0x1b2/0x370 net/core/skbuff.c:1449
+ qrtr_bcast_enqueue+0x6d/0x140 net/qrtr/qrtr.c:857
+ qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
+ sock_sendmsg_nosec net/socket.c:651 [inline]
+ sock_sendmsg net/socket.c:671 [inline]
+ sock_write_iter+0x317/0x470 net/socket.c:998
+ call_write_iter include/linux/fs.h:1882 [inline]
+ new_sync_write fs/read_write.c:503 [inline]
+ vfs_write+0xa96/0xd10 fs/read_write.c:578
+ ksys_write+0x11b/0x220 fs/read_write.c:631
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Freed by task 4316:
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track+0x3d/0x70 mm/kasan/common.c:56
+ kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355
+ __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422
+ __cache_free mm/slab.c:3418 [inline]
+ kmem_cache_free+0x82/0xf0 mm/slab.c:3693
+ __skb_pad+0x3f5/0x5a0 net/core/skbuff.c:1823
+ __skb_put_padto include/linux/skbuff.h:3233 [inline]
+ skb_put_padto include/linux/skbuff.h:3252 [inline]
+ qrtr_node_enqueue+0x62f/0xc00 net/qrtr/qrtr.c:360
+ qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861
+ qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
+ sock_sendmsg_nosec net/socket.c:651 [inline]
+ sock_sendmsg net/socket.c:671 [inline]
+ sock_write_iter+0x317/0x470 net/socket.c:998
+ call_write_iter include/linux/fs.h:1882 [inline]
+ new_sync_write fs/read_write.c:503 [inline]
+ vfs_write+0xa96/0xd10 fs/read_write.c:578
+ ksys_write+0x11b/0x220 fs/read_write.c:631
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+The buggy address belongs to the object at ffff88804d8ab3c0
+ which belongs to the cache skbuff_head_cache of size 224
+The buggy address is located 0 bytes inside of
+ 224-byte region [ffff88804d8ab3c0, ffff88804d8ab4a0)
+The buggy address belongs to the page:
+page:00000000ea8cccfb refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88804d8abb40 pfn:0x4d8ab
+flags: 0xfffe0000000200(slab)
+raw: 00fffe0000000200 ffffea0002237ec8 ffffea00029b3388 ffff88821bb66800
+raw: ffff88804d8abb40 ffff88804d8ab000 000000010000000b 0000000000000000
+page dumped because: kasan: bad access detected
+
+Fixes: ce57785bf91b ("net: qrtr: fix len of skb_put_padto in qrtr_node_enqueue")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Carl Huang <cjhuang@codeaurora.org>
+Cc: Wen Gong <wgong@codeaurora.org>
+Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
+Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/qrtr/qrtr.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/net/qrtr/qrtr.c
++++ b/net/qrtr/qrtr.c
+@@ -332,8 +332,7 @@ static int qrtr_node_enqueue(struct qrtr
+ {
+       struct qrtr_hdr_v1 *hdr;
+       size_t len = skb->len;
+-      int rc = -ENODEV;
+-      int confirm_rx;
++      int rc, confirm_rx;
+       confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
+       if (confirm_rx < 0) {
+@@ -357,15 +356,17 @@ static int qrtr_node_enqueue(struct qrtr
+       hdr->size = cpu_to_le32(len);
+       hdr->confirm_rx = !!confirm_rx;
+-      skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
+-
+-      mutex_lock(&node->ep_lock);
+-      if (node->ep)
+-              rc = node->ep->xmit(node->ep, skb);
+-      else
+-              kfree_skb(skb);
+-      mutex_unlock(&node->ep_lock);
++      rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
++      if (!rc) {
++              mutex_lock(&node->ep_lock);
++              rc = -ENODEV;
++              if (node->ep)
++                      rc = node->ep->xmit(node->ep, skb);
++              else
++                      kfree_skb(skb);
++              mutex_unlock(&node->ep_lock);
++      }
+       /* Need to ensure that a subsequent message carries the otherwise lost
+        * confirm_rx flag if we dropped this one */
+       if (rc && confirm_rx)
diff --git a/queue-5.8/net-sch_generic-aviod-concurrent-reset-and-enqueue-op-for-lockless-qdisc.patch b/queue-5.8/net-sch_generic-aviod-concurrent-reset-and-enqueue-op-for-lockless-qdisc.patch
new file mode 100644 (file)
index 0000000..71da2f6
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Yunsheng Lin <linyunsheng@huawei.com>
+Date: Tue, 8 Sep 2020 19:02:34 +0800
+Subject: net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc
+
+From: Yunsheng Lin <linyunsheng@huawei.com>
+
+[ Upstream commit 2fb541c862c987d02dfdf28f1545016deecfa0d5 ]
+
+Currently there is concurrent reset and enqueue operation for the
+same lockless qdisc when there is no lock to synchronize the
+q->enqueue() in __dev_xmit_skb() with the qdisc reset operation in
+qdisc_deactivate() called by dev_deactivate_queue(), which may cause
+out-of-bounds access for priv->ring[] in hns3 driver if user has
+requested a smaller queue num when __dev_xmit_skb() still enqueue a
+skb with a larger queue_mapping after the corresponding qdisc is
+reset, and call hns3_nic_net_xmit() with that skb later.
+
+Reused the existing synchronize_net() in dev_deactivate_many() to
+make sure skb with larger queue_mapping enqueued to old qdisc(which
+is saved in dev_queue->qdisc_sleeping) will always be reset when
+dev_reset_queue() is called.
+
+Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
+Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_generic.c |   48 +++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 33 insertions(+), 15 deletions(-)
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -1131,24 +1131,10 @@ EXPORT_SYMBOL(dev_activate);
+ static void qdisc_deactivate(struct Qdisc *qdisc)
+ {
+-      bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+-
+       if (qdisc->flags & TCQ_F_BUILTIN)
+               return;
+-      if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
+-              return;
+-
+-      if (nolock)
+-              spin_lock_bh(&qdisc->seqlock);
+-      spin_lock_bh(qdisc_lock(qdisc));
+       set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+-
+-      qdisc_reset(qdisc);
+-
+-      spin_unlock_bh(qdisc_lock(qdisc));
+-      if (nolock)
+-              spin_unlock_bh(&qdisc->seqlock);
+ }
+ static void dev_deactivate_queue(struct net_device *dev,
+@@ -1165,6 +1151,30 @@ static void dev_deactivate_queue(struct
+       }
+ }
++static void dev_reset_queue(struct net_device *dev,
++                          struct netdev_queue *dev_queue,
++                          void *_unused)
++{
++      struct Qdisc *qdisc;
++      bool nolock;
++
++      qdisc = dev_queue->qdisc_sleeping;
++      if (!qdisc)
++              return;
++
++      nolock = qdisc->flags & TCQ_F_NOLOCK;
++
++      if (nolock)
++              spin_lock_bh(&qdisc->seqlock);
++      spin_lock_bh(qdisc_lock(qdisc));
++
++      qdisc_reset(qdisc);
++
++      spin_unlock_bh(qdisc_lock(qdisc));
++      if (nolock)
++              spin_unlock_bh(&qdisc->seqlock);
++}
++
+ static bool some_qdisc_is_busy(struct net_device *dev)
+ {
+       unsigned int i;
+@@ -1213,12 +1223,20 @@ void dev_deactivate_many(struct list_hea
+               dev_watchdog_down(dev);
+       }
+-      /* Wait for outstanding qdisc-less dev_queue_xmit calls.
++      /* Wait for outstanding qdisc-less dev_queue_xmit calls or
++       * outstanding qdisc enqueuing calls.
+        * This is avoided if all devices are in dismantle phase :
+        * Caller will call synchronize_net() for us
+        */
+       synchronize_net();
++      list_for_each_entry(dev, head, close_list) {
++              netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
++
++              if (dev_ingress_queue(dev))
++                      dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
++      }
++
+       /* Wait for outstanding qdisc_run calls. */
+       list_for_each_entry(dev, head, close_list) {
+               while (some_qdisc_is_busy(dev)) {
diff --git a/queue-5.8/net-sched-initialize-with-0-before-setting-erspan-md-u.patch b/queue-5.8/net-sched-initialize-with-0-before-setting-erspan-md-u.patch
new file mode 100644 (file)
index 0000000..260de22
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 13 Sep 2020 19:43:03 +0800
+Subject: net: sched: initialize with 0 before setting erspan md->u
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 8e1b3ac4786680c2d2b5a24e38a2d714c3bcd1ef ]
+
+In fl_set_erspan_opt(), all bits of erspan md was set 1, as this
+function is also used to set opt MASK. However, when setting for
+md->u.index for opt VALUE, the rest bits of the union md->u will
+be left 1. It would cause to fail the match of the whole md when
+version is 1 and only index is set.
+
+This patch is to fix by initializing with 0 before setting erspan
+md->u.
+
+Reported-by: Shuang Li <shuali@redhat.com>
+Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_flower.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -1215,6 +1215,7 @@ static int fl_set_erspan_opt(const struc
+               }
+               if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
+                       nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
++                      memset(&md->u, 0x00, sizeof(md->u));
+                       md->u.index = nla_get_be32(nla);
+               }
+       } else if (md->version == 2) {
diff --git a/queue-5.8/net-sctp-fix-ipv6-ancestor_size-calc-in-sctp_copy_descendant.patch b/queue-5.8/net-sctp-fix-ipv6-ancestor_size-calc-in-sctp_copy_descendant.patch
new file mode 100644 (file)
index 0000000..1ef3252
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Henry Ptasinski <hptasinski@google.com>
+Date: Sat, 19 Sep 2020 00:12:11 +0000
+Subject: net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant
+
+From: Henry Ptasinski <hptasinski@google.com>
+
+[ Upstream commit fe81d9f6182d1160e625894eecb3d7ff0222cac5 ]
+
+When calculating ancestor_size with IPv6 enabled, simply using
+sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for
+alignment in the struct sctp6_sock. On x86, there aren't any extra
+bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte
+boundary so there were 4 pad bytes that were omitted from the
+ancestor_size calculation.  This would lead to corruption of the
+pd_lobby pointers, causing an oops when trying to free the sctp
+structure on socket close.
+
+Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant")
+Signed-off-by: Henry Ptasinski <hptasinski@google.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/structs.h |    8 +++++---
+ net/sctp/socket.c          |    9 +++------
+ 2 files changed, 8 insertions(+), 9 deletions(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -226,12 +226,14 @@ struct sctp_sock {
+               data_ready_signalled:1;
+       atomic_t pd_mode;
++
++      /* Fields after this point will be skipped on copies, like on accept
++       * and peeloff operations
++       */
++
+       /* Receive to here while partial delivery is in effect. */
+       struct sk_buff_head pd_lobby;
+-      /* These must be the last fields, as they will skipped on copies,
+-       * like on accept and peeloff operations
+-       */
+       struct list_head auto_asconf_list;
+       int do_auto_asconf;
+ };
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -9457,13 +9457,10 @@ void sctp_copy_sock(struct sock *newsk,
+ static inline void sctp_copy_descendant(struct sock *sk_to,
+                                       const struct sock *sk_from)
+ {
+-      int ancestor_size = sizeof(struct inet_sock) +
+-                          sizeof(struct sctp_sock) -
+-                          offsetof(struct sctp_sock, pd_lobby);
+-
+-      if (sk_from->sk_family == PF_INET6)
+-              ancestor_size += sizeof(struct ipv6_pinfo);
++      size_t ancestor_size = sizeof(struct inet_sock);
++      ancestor_size += sk_from->sk_prot->obj_size;
++      ancestor_size -= offsetof(struct sctp_sock, pd_lobby);
+       __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
+ }
diff --git a/queue-5.8/nfp-use-correct-define-to-return-none-fec.patch b/queue-5.8/nfp-use-correct-define-to-return-none-fec.patch
new file mode 100644 (file)
index 0000000..4267188
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Thu, 17 Sep 2020 10:52:57 -0700
+Subject: nfp: use correct define to return NONE fec
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5f6857e808a8bd078296575b417c4b9d160b9779 ]
+
+struct ethtool_fecparam carries bitmasks not bit numbers.
+We want to return 1 (NONE), not 0.
+
+Fixes: 0d0870938337 ("nfp: implement ethtool FEC mode settings")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+@@ -829,8 +829,8 @@ nfp_port_get_fecparam(struct net_device
+       struct nfp_eth_table_port *eth_port;
+       struct nfp_port *port;
+-      param->active_fec = ETHTOOL_FEC_NONE_BIT;
+-      param->fec = ETHTOOL_FEC_NONE_BIT;
++      param->active_fec = ETHTOOL_FEC_NONE;
++      param->fec = ETHTOOL_FEC_NONE;
+       port = nfp_port_from_netdev(netdev);
+       eth_port = nfp_port_get_eth_port(port);
diff --git a/queue-5.8/revert-netns-don-t-disable-bhs-when-locking-nsid_lock.patch b/queue-5.8/revert-netns-don-t-disable-bhs-when-locking-nsid_lock.patch
new file mode 100644 (file)
index 0000000..80221fa
--- /dev/null
@@ -0,0 +1,181 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Mon, 7 Sep 2020 15:54:41 +0000
+Subject: Revert "netns: don't disable BHs when locking "nsid_lock""
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit e1f469cd5866499ac40bfdca87411e1c525a10c7 ]
+
+This reverts commit 8d7e5dee972f1cde2ba96c621f1541fa36e7d4f4.
+
+To protect netns id, the nsid_lock is used when netns id is being
+allocated and removed by peernet2id_alloc() and unhash_nsid().
+The nsid_lock can be used in BH context but only spin_lock() is used
+in this code.
+Using spin_lock() instead of spin_lock_bh() can result in a deadlock in
+the following scenario reported by the lockdep.
+In order to avoid a deadlock, the spin_lock_bh() should be used instead
+of spin_lock() to acquire nsid_lock.
+
+Test commands:
+    ip netns del nst
+    ip netns add nst
+    ip link add veth1 type veth peer name veth2
+    ip link set veth1 netns nst
+    ip netns exec nst ip link add name br1 type bridge vlan_filtering 1
+    ip netns exec nst ip link set dev br1 up
+    ip netns exec nst ip link set dev veth1 master br1
+    ip netns exec nst ip link set dev veth1 up
+    ip netns exec nst ip link add macvlan0 link br1 up type macvlan
+
+Splat looks like:
+[   33.615860][  T607] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
+[   33.617194][  T607] 5.9.0-rc1+ #665 Not tainted
+[ ... ]
+[   33.670615][  T607] Chain exists of:
+[   33.670615][  T607]   &mc->mca_lock --> &bridge_netdev_addr_lock_key --> &net->nsid_lock
+[   33.670615][  T607]
+[   33.673118][  T607]  Possible interrupt unsafe locking scenario:
+[   33.673118][  T607]
+[   33.674599][  T607]        CPU0                    CPU1
+[   33.675557][  T607]        ----                    ----
+[   33.676516][  T607]   lock(&net->nsid_lock);
+[   33.677306][  T607]                                local_irq_disable();
+[   33.678517][  T607]                                lock(&mc->mca_lock);
+[   33.679725][  T607]                                lock(&bridge_netdev_addr_lock_key);
+[   33.681166][  T607]   <Interrupt>
+[   33.681791][  T607]     lock(&mc->mca_lock);
+[   33.682579][  T607]
+[   33.682579][  T607]  *** DEADLOCK ***
+[ ... ]
+[   33.922046][  T607] stack backtrace:
+[   33.922999][  T607] CPU: 3 PID: 607 Comm: ip Not tainted 5.9.0-rc1+ #665
+[   33.924099][  T607] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
+[   33.925714][  T607] Call Trace:
+[   33.926238][  T607]  dump_stack+0x78/0xab
+[   33.926905][  T607]  check_irq_usage+0x70b/0x720
+[   33.927708][  T607]  ? iterate_chain_key+0x60/0x60
+[   33.928507][  T607]  ? check_path+0x22/0x40
+[   33.929201][  T607]  ? check_noncircular+0xcf/0x180
+[   33.930024][  T607]  ? __lock_acquire+0x1952/0x1f20
+[   33.930860][  T607]  __lock_acquire+0x1952/0x1f20
+[   33.931667][  T607]  lock_acquire+0xaf/0x3a0
+[   33.932366][  T607]  ? peernet2id_alloc+0x3a/0x170
+[   33.933147][  T607]  ? br_port_fill_attrs+0x54c/0x6b0 [bridge]
+[   33.934140][  T607]  ? br_port_fill_attrs+0x5de/0x6b0 [bridge]
+[   33.935113][  T607]  ? kvm_sched_clock_read+0x14/0x30
+[   33.935974][  T607]  _raw_spin_lock+0x30/0x70
+[   33.936728][  T607]  ? peernet2id_alloc+0x3a/0x170
+[   33.937523][  T607]  peernet2id_alloc+0x3a/0x170
+[   33.938313][  T607]  rtnl_fill_ifinfo+0xb5e/0x1400
+[   33.939091][  T607]  rtmsg_ifinfo_build_skb+0x8a/0xf0
+[   33.939953][  T607]  rtmsg_ifinfo_event.part.39+0x17/0x50
+[   33.940863][  T607]  rtmsg_ifinfo+0x1f/0x30
+[   33.941571][  T607]  __dev_notify_flags+0xa5/0xf0
+[   33.942376][  T607]  ? __irq_work_queue_local+0x49/0x50
+[   33.943249][  T607]  ? irq_work_queue+0x1d/0x30
+[   33.943993][  T607]  ? __dev_set_promiscuity+0x7b/0x1a0
+[   33.944878][  T607]  __dev_set_promiscuity+0x7b/0x1a0
+[   33.945758][  T607]  dev_set_promiscuity+0x1e/0x50
+[   33.946582][  T607]  br_port_set_promisc+0x1f/0x40 [bridge]
+[   33.947487][  T607]  br_manage_promisc+0x8b/0xe0 [bridge]
+[   33.948388][  T607]  __dev_set_promiscuity+0x123/0x1a0
+[   33.949244][  T607]  __dev_set_rx_mode+0x68/0x90
+[   33.950021][  T607]  dev_uc_add+0x50/0x60
+[   33.950720][  T607]  macvlan_open+0x18e/0x1f0 [macvlan]
+[   33.951601][  T607]  __dev_open+0xd6/0x170
+[   33.952269][  T607]  __dev_change_flags+0x181/0x1d0
+[   33.953056][  T607]  rtnl_configure_link+0x2f/0xa0
+[   33.953884][  T607]  __rtnl_newlink+0x6b9/0x8e0
+[   33.954665][  T607]  ? __lock_acquire+0x95d/0x1f20
+[   33.955450][  T607]  ? lock_acquire+0xaf/0x3a0
+[   33.956193][  T607]  ? is_bpf_text_address+0x5/0xe0
+[   33.956999][  T607]  rtnl_newlink+0x47/0x70
+
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Fixes: 8d7e5dee972f ("netns: don't disable BHs when locking "nsid_lock"")
+Reported-by: syzbot+3f960c64a104eaa2c813@syzkaller.appspotmail.com
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net_namespace.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -251,10 +251,10 @@ int peernet2id_alloc(struct net *net, st
+       if (refcount_read(&net->count) == 0)
+               return NETNSA_NSID_NOT_ASSIGNED;
+-      spin_lock(&net->nsid_lock);
++      spin_lock_bh(&net->nsid_lock);
+       id = __peernet2id(net, peer);
+       if (id >= 0) {
+-              spin_unlock(&net->nsid_lock);
++              spin_unlock_bh(&net->nsid_lock);
+               return id;
+       }
+@@ -264,12 +264,12 @@ int peernet2id_alloc(struct net *net, st
+        * just been idr_remove()'d from there in cleanup_net().
+        */
+       if (!maybe_get_net(peer)) {
+-              spin_unlock(&net->nsid_lock);
++              spin_unlock_bh(&net->nsid_lock);
+               return NETNSA_NSID_NOT_ASSIGNED;
+       }
+       id = alloc_netid(net, peer, -1);
+-      spin_unlock(&net->nsid_lock);
++      spin_unlock_bh(&net->nsid_lock);
+       put_net(peer);
+       if (id < 0)
+@@ -534,20 +534,20 @@ static void unhash_nsid(struct net *net,
+       for_each_net(tmp) {
+               int id;
+-              spin_lock(&tmp->nsid_lock);
++              spin_lock_bh(&tmp->nsid_lock);
+               id = __peernet2id(tmp, net);
+               if (id >= 0)
+                       idr_remove(&tmp->netns_ids, id);
+-              spin_unlock(&tmp->nsid_lock);
++              spin_unlock_bh(&tmp->nsid_lock);
+               if (id >= 0)
+                       rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
+                                         GFP_KERNEL);
+               if (tmp == last)
+                       break;
+       }
+-      spin_lock(&net->nsid_lock);
++      spin_lock_bh(&net->nsid_lock);
+       idr_destroy(&net->netns_ids);
+-      spin_unlock(&net->nsid_lock);
++      spin_unlock_bh(&net->nsid_lock);
+ }
+ static LLIST_HEAD(cleanup_list);
+@@ -760,9 +760,9 @@ static int rtnl_net_newid(struct sk_buff
+               return PTR_ERR(peer);
+       }
+-      spin_lock(&net->nsid_lock);
++      spin_lock_bh(&net->nsid_lock);
+       if (__peernet2id(net, peer) >= 0) {
+-              spin_unlock(&net->nsid_lock);
++              spin_unlock_bh(&net->nsid_lock);
+               err = -EEXIST;
+               NL_SET_BAD_ATTR(extack, nla);
+               NL_SET_ERR_MSG(extack,
+@@ -771,7 +771,7 @@ static int rtnl_net_newid(struct sk_buff
+       }
+       err = alloc_netid(net, peer, nsid);
+-      spin_unlock(&net->nsid_lock);
++      spin_unlock_bh(&net->nsid_lock);
+       if (err >= 0) {
+               rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
+                                 nlh, GFP_KERNEL);
index 92f80e8b24ab501442f3bc3ee50af08bb67caeb3..c88da4e45fa5e68cc26d98b7e7ac8599c66606ec 100644 (file)
@@ -1,2 +1,56 @@
 ibmvnic-fix-null-tx_pools-and-rx_tools-issue-at-do_r.patch
 ibmvnic-add-missing-parenthesis-in-do_reset.patch
+act_ife-load-meta-modules-before-tcf_idr_check_alloc.patch
+bnxt_en-avoid-sending-firmware-messages-when-aer-error-is-detected.patch
+bnxt_en-fix-null-ptr-dereference-crash-in-bnxt_fw_reset_task.patch
+cxgb4-fix-memory-leak-during-module-unload.patch
+cxgb4-fix-offset-when-clearing-filter-byte-counters.patch
+geneve-add-transport-ports-in-route-lookup-for-geneve.patch
+hdlc_ppp-add-range-checks-in-ppp_cp_parse_cr.patch
+hinic-bump-up-the-timeout-of-set_func_state-cmd.patch
+ip-fix-tos-reflection-in-ack-and-reset-packets.patch
+ipv4-initialize-flowi4_multipath_hash-in-data-path.patch
+ipv4-update-exception-handling-for-multipath-routes-via-same-device.patch
+ipv6-avoid-lockdep-issue-in-fib6_del.patch
+net-bridge-br_vlan_get_pvid_rcu-should-dereference-the-vlan-group-under-rcu.patch
+net-dcb-validate-dcb_attr_dcb_buffer-argument.patch
+net-dsa-rtl8366-properly-clear-member-config.patch
+net-fix-bridge-enslavement-failure.patch
+net-ipv6-fix-kconfig-dependency-warning-for-ipv6_seg6_hmac.patch
+net-mlx5-fix-fte-cleanup.patch
+net-phy-call-phy_disable_interrupts-in-phy_attach_direct-instead.patch
+net-sched-initialize-with-0-before-setting-erspan-md-u.patch
+net-sch_generic-aviod-concurrent-reset-and-enqueue-op-for-lockless-qdisc.patch
+net-sctp-fix-ipv6-ancestor_size-calc-in-sctp_copy_descendant.patch
+nfp-use-correct-define-to-return-none-fec.patch
+taprio-fix-allowing-too-small-intervals.patch
+tipc-fix-memory-leak-in-tipc_group_create_member.patch
+tipc-fix-shutdown-of-connection-oriented-socket.patch
+tipc-use-skb_unshare-instead-in-tipc_buf_append.patch
+net-mlx5e-enable-adding-peer-miss-rules-only-if-merged-eswitch-is-supported.patch
+net-mlx5e-tls-do-not-expose-fpga-tls-counter-if-not-supported.patch
+bnxt_en-use-memcpy-to-copy-vpd-field-info.patch
+bnxt_en-return-proper-error-codes-in-bnxt_show_temp.patch
+bnxt_en-protect-bnxt_set_eee-and-bnxt_set_pauseparam-with-mutex.patch
+net-lantiq-wake-tx-queue-again.patch
+net-lantiq-use-netif_tx_napi_add-for-tx-napi.patch
+net-lantiq-use-napi_complete_done.patch
+net-lantiq-disable-irqs-only-if-napi-gets-scheduled.patch
+net-phy-avoid-npd-upon-phy_detach-when-driver-is-unbound.patch
+net-phy-do-not-warn-in-phy_stop-on-phy_down.patch
+net-qrtr-check-skb_put_padto-return-value.patch
+net-add-__must_check-to-skb_put_padto.patch
+net-ethernet-ti-cpsw_new-fix-suspend-resume.patch
+wireguard-noise-take-lock-when-removing-handshake-entry-from-table.patch
+wireguard-peerlookup-take-lock-before-checking-hash-in-replace-operation.patch
+net-ipa-fix-u32_replace_bits-by-u32p_xxx-version.patch
+net-mlx5e-fix-memory-leak-of-tunnel-info-when-rule-under-multipath-not-ready.patch
+hinic-fix-rewaking-txq-after-netif_tx_disable.patch
+hv_netvsc-fix-hibernation-for-mlx5-vf-driver.patch
+net-dsa-link-interfaces-with-the-dsa-master-to-get-rid-of-lockdep-warnings.patch
+net-dsa-microchip-ksz8795-really-set-the-correct-number-of-ports.patch
+net-macb-fix-for-pause-frame-receive-enable-bit.patch
+revert-netns-don-t-disable-bhs-when-locking-nsid_lock.patch
+net-mlx5e-use-rcu-to-protect-rq-xdp_prog.patch
+net-mlx5e-use-synchronize_rcu-to-sync-with-napi.patch
+net-mlx5e-fix-endianness-when-calculating-pedit-mask-first-bit.patch
diff --git a/queue-5.8/taprio-fix-allowing-too-small-intervals.patch b/queue-5.8/taprio-fix-allowing-too-small-intervals.patch
new file mode 100644 (file)
index 0000000..07c88e1
--- /dev/null
@@ -0,0 +1,116 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Date: Wed, 9 Sep 2020 17:03:11 -0700
+Subject: taprio: Fix allowing too small intervals
+
+From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+
+[ Upstream commit b5b73b26b3ca34574124ed7ae9c5ba8391a7f176 ]
+
+It's possible that the user specifies an interval that couldn't allow
+any packet to be transmitted. This also avoids the issue of the
+hrtimer handler starving the other threads because it's running too
+often.
+
+The solution is to reject interval sizes that according to the current
+link speed wouldn't allow any packet to be transmitted.
+
+Reported-by: syzbot+8267241609ae8c23b248@syzkaller.appspotmail.com
+Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler")
+Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_taprio.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -777,9 +777,11 @@ static const struct nla_policy taprio_po
+       [TCA_TAPRIO_ATTR_TXTIME_DELAY]               = { .type = NLA_U32 },
+ };
+-static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
++static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
++                          struct sched_entry *entry,
+                           struct netlink_ext_ack *extack)
+ {
++      int min_duration = length_to_duration(q, ETH_ZLEN);
+       u32 interval = 0;
+       if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
+@@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlatt
+               interval = nla_get_u32(
+                       tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
+-      if (interval == 0) {
++      /* The interval should allow at least the minimum ethernet
++       * frame to go out.
++       */
++      if (interval < min_duration) {
+               NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+               return -EINVAL;
+       }
+@@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlatt
+       return 0;
+ }
+-static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
+-                           int index, struct netlink_ext_ack *extack)
++static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
++                           struct sched_entry *entry, int index,
++                           struct netlink_ext_ack *extack)
+ {
+       struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+       int err;
+@@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlat
+       entry->index = index;
+-      return fill_sched_entry(tb, entry, extack);
++      return fill_sched_entry(q, tb, entry, extack);
+ }
+-static int parse_sched_list(struct nlattr *list,
++static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
+                           struct sched_gate_list *sched,
+                           struct netlink_ext_ack *extack)
+ {
+@@ -847,7 +853,7 @@ static int parse_sched_list(struct nlatt
+                       return -ENOMEM;
+               }
+-              err = parse_sched_entry(n, entry, i, extack);
++              err = parse_sched_entry(q, n, entry, i, extack);
+               if (err < 0) {
+                       kfree(entry);
+                       return err;
+@@ -862,7 +868,7 @@ static int parse_sched_list(struct nlatt
+       return i;
+ }
+-static int parse_taprio_schedule(struct nlattr **tb,
++static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
+                                struct sched_gate_list *new,
+                                struct netlink_ext_ack *extack)
+ {
+@@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct
+               new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
+       if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
+-              err = parse_sched_list(
+-                      tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
++              err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
++                                     new, extack);
+       if (err < 0)
+               return err;
+@@ -1474,7 +1480,7 @@ static int taprio_change(struct Qdisc *s
+               goto free_sched;
+       }
+-      err = parse_taprio_schedule(tb, new_admin, extack);
++      err = parse_taprio_schedule(q, tb, new_admin, extack);
+       if (err < 0)
+               goto free_sched;
diff --git a/queue-5.8/tipc-fix-memory-leak-in-tipc_group_create_member.patch b/queue-5.8/tipc-fix-memory-leak-in-tipc_group_create_member.patch
new file mode 100644 (file)
index 0000000..4767f61
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Peilin Ye <yepeilin.cs@gmail.com>
+Date: Sun, 13 Sep 2020 04:06:05 -0400
+Subject: tipc: Fix memory leak in tipc_group_create_member()
+
+From: Peilin Ye <yepeilin.cs@gmail.com>
+
+[ Upstream commit bb3a420d47ab00d7e1e5083286cab15235a96680 ]
+
+tipc_group_add_to_tree() returns silently if `key` matches `nkey` of an
+existing node, causing tipc_group_create_member() to leak memory. Let
+tipc_group_add_to_tree() return an error in such a case, so that
+tipc_group_create_member() can handle it properly.
+
+Fixes: 75da2163dbb6 ("tipc: introduce communication groups")
+Reported-and-tested-by: syzbot+f95d90c454864b3b5bc9@syzkaller.appspotmail.com
+Cc: Hillf Danton <hdanton@sina.com>
+Link: https://syzkaller.appspot.com/bug?id=048390604fe1b60df34150265479202f10e13aff
+Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/group.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/net/tipc/group.c
++++ b/net/tipc/group.c
+@@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_fi
+       return NULL;
+ }
+-static void tipc_group_add_to_tree(struct tipc_group *grp,
+-                                 struct tipc_member *m)
++static int tipc_group_add_to_tree(struct tipc_group *grp,
++                                struct tipc_member *m)
+ {
+       u64 nkey, key = (u64)m->node << 32 | m->port;
+       struct rb_node **n, *parent = NULL;
+@@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struc
+               else if (key > nkey)
+                       n = &(*n)->rb_right;
+               else
+-                      return;
++                      return -EEXIST;
+       }
+       rb_link_node(&m->tree_node, parent, n);
+       rb_insert_color(&m->tree_node, &grp->members);
++      return 0;
+ }
+ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+@@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_cr
+                                                   u32 instance, int state)
+ {
+       struct tipc_member *m;
++      int ret;
+       m = kzalloc(sizeof(*m), GFP_ATOMIC);
+       if (!m)
+@@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_cr
+       m->port = port;
+       m->instance = instance;
+       m->bc_acked = grp->bc_snd_nxt - 1;
++      ret = tipc_group_add_to_tree(grp, m);
++      if (ret < 0) {
++              kfree(m);
++              return NULL;
++      }
+       grp->member_cnt++;
+-      tipc_group_add_to_tree(grp, m);
+       tipc_nlist_add(&grp->dests, m->node);
+       m->state = state;
+       return m;
diff --git a/queue-5.8/tipc-fix-shutdown-of-connection-oriented-socket.patch b/queue-5.8/tipc-fix-shutdown-of-connection-oriented-socket.patch
new file mode 100644 (file)
index 0000000..2adf57a
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Sat, 5 Sep 2020 15:14:47 +0900
+Subject: tipc: fix shutdown() of connection oriented socket
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit a4b5cc9e10803ecba64a7d54c0f47e4564b4a980 ]
+
+I confirmed that the problem fixed by commit 2a63866c8b51a3f7 ("tipc: fix
+shutdown() of connectionless socket") also applies to stream socket.
+
+----------
+#include <sys/socket.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(int argc, char *argv[])
+{
+        int fds[2] = { -1, -1 };
+        socketpair(PF_TIPC, SOCK_STREAM /* or SOCK_DGRAM */, 0, fds);
+        if (fork() == 0)
+                _exit(read(fds[0], NULL, 1));
+        shutdown(fds[0], SHUT_RDWR); /* This must make read() return. */
+        wait(NULL); /* To be woken up by _exit(). */
+        return 0;
+}
+----------
+
+Since shutdown(SHUT_RDWR) should affect all processes sharing that socket,
+unconditionally setting sk->sk_shutdown to SHUTDOWN_MASK will be the right
+behavior.
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2773,10 +2773,7 @@ static int tipc_shutdown(struct socket *
+       trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
+       __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
+-      if (tipc_sk_type_connectionless(sk))
+-              sk->sk_shutdown = SHUTDOWN_MASK;
+-      else
+-              sk->sk_shutdown = SEND_SHUTDOWN;
++      sk->sk_shutdown = SHUTDOWN_MASK;
+       if (sk->sk_state == TIPC_DISCONNECTING) {
+               /* Discard any unreceived messages */
diff --git a/queue-5.8/tipc-use-skb_unshare-instead-in-tipc_buf_append.patch b/queue-5.8/tipc-use-skb_unshare-instead-in-tipc_buf_append.patch
new file mode 100644 (file)
index 0000000..8bbabbb
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 13 Sep 2020 19:37:31 +0800
+Subject: tipc: use skb_unshare() instead in tipc_buf_append()
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit ff48b6222e65ebdba5a403ef1deba6214e749193 ]
+
+In tipc_buf_append() it may change skb's frag_list, and it causes
+problems when this skb is cloned. skb_unclone() doesn't really
+make this skb's flag_list available to change.
+
+Shuang Li has reported an use-after-free issue because of this
+when creating quite a few macvlan dev over the same dev, where
+the broadcast packets will be cloned and go up to the stack:
+
+ [ ] BUG: KASAN: use-after-free in pskb_expand_head+0x86d/0xea0
+ [ ] Call Trace:
+ [ ]  dump_stack+0x7c/0xb0
+ [ ]  print_address_description.constprop.7+0x1a/0x220
+ [ ]  kasan_report.cold.10+0x37/0x7c
+ [ ]  check_memory_region+0x183/0x1e0
+ [ ]  pskb_expand_head+0x86d/0xea0
+ [ ]  process_backlog+0x1df/0x660
+ [ ]  net_rx_action+0x3b4/0xc90
+ [ ]
+ [ ] Allocated by task 1786:
+ [ ]  kmem_cache_alloc+0xbf/0x220
+ [ ]  skb_clone+0x10a/0x300
+ [ ]  macvlan_broadcast+0x2f6/0x590 [macvlan]
+ [ ]  macvlan_process_broadcast+0x37c/0x516 [macvlan]
+ [ ]  process_one_work+0x66a/0x1060
+ [ ]  worker_thread+0x87/0xb10
+ [ ]
+ [ ] Freed by task 3253:
+ [ ]  kmem_cache_free+0x82/0x2a0
+ [ ]  skb_release_data+0x2c3/0x6e0
+ [ ]  kfree_skb+0x78/0x1d0
+ [ ]  tipc_recvmsg+0x3be/0xa40 [tipc]
+
+So fix it by using skb_unshare() instead, which would create a new
+skb for the cloned frag and it'll be safe to change its frag_list.
+The similar things were also done in sctp_make_reassembled_event(),
+which is using skb_copy().
+
+Reported-by: Shuang Li <shuali@redhat.com>
+Fixes: 37e22164a8a3 ("tipc: rename and move message reassembly function")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/msg.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **hea
+       if (fragid == FIRST_FRAGMENT) {
+               if (unlikely(head))
+                       goto err;
+-              if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
++              frag = skb_unshare(frag, GFP_ATOMIC);
++              if (unlikely(!frag))
+                       goto err;
+               head = *headbuf = frag;
+               *buf = NULL;
diff --git a/queue-5.8/wireguard-noise-take-lock-when-removing-handshake-entry-from-table.patch b/queue-5.8/wireguard-noise-take-lock-when-removing-handshake-entry-from-table.patch
new file mode 100644 (file)
index 0000000..a9aa0d5
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 9 Sep 2020 13:58:14 +0200
+Subject: wireguard: noise: take lock when removing handshake entry from table
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+[ Upstream commit 9179ba31367bcf481c3c79b5f028c94faad9f30a ]
+
+Eric reported that syzkaller found a race of this variety:
+
+CPU 1                                       CPU 2
+-------------------------------------------|---------------------------------------
+wg_index_hashtable_replace(old, ...)       |
+  if (hlist_unhashed(&old->index_hash))    |
+                                           | wg_index_hashtable_remove(old)
+                                           |   hlist_del_init_rcu(&old->index_hash)
+                                          |     old->index_hash.pprev = NULL
+  hlist_replace_rcu(&old->index_hash, ...) |
+    *old->index_hash.pprev                 |
+
+Syzbot wasn't actually able to reproduce this more than once or create a
+reproducer, because the race window between checking "hlist_unhashed" and
+calling "hlist_replace_rcu" is just so small. Adding an mdelay(5) or
+similar there helps make this demonstrable using this simple script:
+
+    #!/bin/bash
+    set -ex
+    trap 'kill $pid1; kill $pid2; ip link del wg0; ip link del wg1' EXIT
+    ip link add wg0 type wireguard
+    ip link add wg1 type wireguard
+    wg set wg0 private-key <(wg genkey) listen-port 9999
+    wg set wg1 private-key <(wg genkey) peer $(wg show wg0 public-key) endpoint 127.0.0.1:9999 persistent-keepalive 1
+    wg set wg0 peer $(wg show wg1 public-key)
+    ip link set wg0 up
+    yes link set wg1 up | ip -force -batch - &
+    pid1=$!
+    yes link set wg1 down | ip -force -batch - &
+    pid2=$!
+    wait
+
+The fundumental underlying problem is that we permit calls to wg_index_
+hashtable_remove(handshake.entry) without requiring the caller to take
+the handshake mutex that is intended to protect members of handshake
+during mutations. This is consistently the case with calls to wg_index_
+hashtable_insert(handshake.entry) and wg_index_hashtable_replace(
+handshake.entry), but it's missing from a pertinent callsite of wg_
+index_hashtable_remove(handshake.entry). So, this patch makes sure that
+mutex is taken.
+
+The original code was a little bit funky though, in the form of:
+
+    remove(handshake.entry)
+    lock(), memzero(handshake.some_members), unlock()
+    remove(handshake.entry)
+
+The original intention of that double removal pattern outside the lock
+appears to be some attempt to prevent insertions that might happen while
+locks are dropped during expensive crypto operations, but actually, all
+callers of wg_index_hashtable_insert(handshake.entry) take the write
+lock and then explicitly check handshake.state, as they should, which
+the aforementioned memzero clears, which means an insertion should
+already be impossible. And regardless, the original intention was
+necessarily racy, since it wasn't guaranteed that something else would
+run after the unlock() instead of after the remove(). So, from a
+soundness perspective, it seems positive to remove what looks like a
+hack at best.
+
+The crash from both syzbot and from the script above is as follows:
+
+  general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN
+  KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+  CPU: 0 PID: 7395 Comm: kworker/0:3 Not tainted 5.9.0-rc4-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  Workqueue: wg-kex-wg1 wg_packet_handshake_receive_worker
+  RIP: 0010:hlist_replace_rcu include/linux/rculist.h:505 [inline]
+  RIP: 0010:wg_index_hashtable_replace+0x176/0x330 drivers/net/wireguard/peerlookup.c:174
+  Code: 00 fc ff df 48 89 f9 48 c1 e9 03 80 3c 01 00 0f 85 44 01 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 10 48 89 c6 48 c1 ee 03 <80> 3c 0e 00 0f 85 06 01 00 00 48 85 d2 4c 89 28 74 47 e8 a3 4f b5
+  RSP: 0018:ffffc90006a97bf8 EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: ffff888050ffc4f8 RCX: dffffc0000000000
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88808e04e010
+  RBP: ffff88808e04e000 R08: 0000000000000001 R09: ffff8880543d0000
+  R10: ffffed100a87a000 R11: 000000000000016e R12: ffff8880543d0000
+  R13: ffff88808e04e008 R14: ffff888050ffc508 R15: ffff888050ffc500
+  FS:  0000000000000000(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00000000f5505db0 CR3: 0000000097cf7000 CR4: 00000000001526f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+  wg_noise_handshake_begin_session+0x752/0xc9a drivers/net/wireguard/noise.c:820
+  wg_receive_handshake_packet drivers/net/wireguard/receive.c:183 [inline]
+  wg_packet_handshake_receive_worker+0x33b/0x730 drivers/net/wireguard/receive.c:220
+  process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
+  worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
+  kthread+0x3b5/0x4a0 kernel/kthread.c:292
+  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reported-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/noise.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/net/wireguard/noise.c
++++ b/drivers/net/wireguard/noise.c
+@@ -87,15 +87,12 @@ static void handshake_zero(struct noise_
+ void wg_noise_handshake_clear(struct noise_handshake *handshake)
+ {
++      down_write(&handshake->lock);
+       wg_index_hashtable_remove(
+                       handshake->entry.peer->device->index_hashtable,
+                       &handshake->entry);
+-      down_write(&handshake->lock);
+       handshake_zero(handshake);
+       up_write(&handshake->lock);
+-      wg_index_hashtable_remove(
+-                      handshake->entry.peer->device->index_hashtable,
+-                      &handshake->entry);
+ }
+ static struct noise_keypair *keypair_create(struct wg_peer *peer)
diff --git a/queue-5.8/wireguard-peerlookup-take-lock-before-checking-hash-in-replace-operation.patch b/queue-5.8/wireguard-peerlookup-take-lock-before-checking-hash-in-replace-operation.patch
new file mode 100644 (file)
index 0000000..285d419
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Fri Sep 25 09:54:34 AM CEST 2020
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 9 Sep 2020 13:58:15 +0200
+Subject: wireguard: peerlookup: take lock before checking hash in replace operation
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+[ Upstream commit 6147f7b1e90ff09bd52afc8b9206a7fcd133daf7 ]
+
+Eric's suggested fix for the previous commit's mentioned race condition
+was to simply take the table->lock in wg_index_hashtable_replace(). The
+table->lock of the hash table is supposed to protect the bucket heads,
+not the entires, but actually, since all the mutator functions are
+already taking it, it makes sense to take it too for the test to
+hlist_unhashed, as a defense in depth measure, so that it no longer
+races with deletions, regardless of what other locks are protecting
+individual entries. This is sensible from a performance perspective
+because, as Eric pointed out, the case of being unhashed is already the
+unlikely case, so this won't add common contention. And comparing
+instructions, this basically doesn't make much of a difference other
+than pushing and popping %r13, used by the new `bool ret`. More
+generally, I like the idea of locking consistency across table mutator
+functions, and this might let me rest slightly easier at night.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/peerlookup.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireguard/peerlookup.c
++++ b/drivers/net/wireguard/peerlookup.c
+@@ -167,9 +167,13 @@ bool wg_index_hashtable_replace(struct i
+                               struct index_hashtable_entry *old,
+                               struct index_hashtable_entry *new)
+ {
+-      if (unlikely(hlist_unhashed(&old->index_hash)))
+-              return false;
++      bool ret;
++
+       spin_lock_bh(&table->lock);
++      ret = !hlist_unhashed(&old->index_hash);
++      if (unlikely(!ret))
++              goto out;
++
+       new->index = old->index;
+       hlist_replace_rcu(&old->index_hash, &new->index_hash);
+@@ -180,8 +184,9 @@ bool wg_index_hashtable_replace(struct i
+        * simply gets dropped, which isn't terrible.
+        */
+       INIT_HLIST_NODE(&old->index_hash);
++out:
+       spin_unlock_bh(&table->lock);
+-      return true;
++      return ret;
+ }
+ void wg_index_hashtable_remove(struct index_hashtable *table,