--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+Date: Tue, 10 Dec 2019 08:39:46 -0800
+Subject: bnxt: apply computed clamp value for coalece parameter
+
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+
+[ Upstream commit 6adc4601c2a1ac87b4ab8ed0cb55db6efd0264e8 ]
+
+After executing "ethtool -C eth0 rx-usecs-irq 0", the box becomes
+unresponsive, likely due to interrupt livelock. It appears that
+a minimum clamp value for the irq timer is computed, but is never
+applied.
+
+Fix by applying the corrected clamp value.
+
+Fixes: 74706afa712d ("bnxt_en: Update interrupt coalescing logic.")
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -6178,7 +6178,7 @@ static void bnxt_hwrm_set_coal_params(st
+ tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks_irq);
+ val = clamp_t(u16, tmr, 1,
+ coal_cap->cmpl_aggr_dma_tmr_during_int_max);
+- req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
++ req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(val);
+ req->enables |=
+ cpu_to_le16(BNXT_COAL_CMPL_AGGR_TMR_DURING_INT_ENABLE);
+ }
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:13 -0500
+Subject: bnxt_en: Add missing devlink health reporters for VFs.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 7e334fc8003c7a38372cc98e7be6082670a47d29 ]
+
+The VF driver also needs to create the health reporters since
+VFs are also involved in firmware reset and recovery. Modify
+bnxt_dl_register() and bnxt_dl_unregister() so that they can
+be called by the VFs to register/unregister devlink. Only the PF
+will register the devlink parameters. With devlink registered,
+we can now create the health reporters on the VFs.
+
+Fixes: 6763c779c2d8 ("bnxt_en: Add new FW devlink_health_reporter")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++--------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 23 ++++++++++++++++------
+ 2 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11343,12 +11343,11 @@ static void bnxt_remove_one(struct pci_d
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct bnxt *bp = netdev_priv(dev);
+
+- if (BNXT_PF(bp)) {
++ if (BNXT_PF(bp))
+ bnxt_sriov_disable(bp);
+- bnxt_dl_fw_reporters_destroy(bp, true);
+- bnxt_dl_unregister(bp);
+- }
+
++ bnxt_dl_fw_reporters_destroy(bp, true);
++ bnxt_dl_unregister(bp);
+ pci_disable_pcie_error_reporting(pdev);
+ unregister_netdev(dev);
+ bnxt_shutdown_tc(bp);
+@@ -11844,10 +11843,8 @@ static int bnxt_init_one(struct pci_dev
+ if (rc)
+ goto init_err_cleanup_tc;
+
+- if (BNXT_PF(bp)) {
+- bnxt_dl_register(bp);
+- bnxt_dl_fw_reporters_create(bp);
+- }
++ bnxt_dl_register(bp);
++ bnxt_dl_fw_reporters_create(bp);
+
+ netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
+ board_info[ent->driver_data].name,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -226,6 +226,8 @@ static const struct devlink_ops bnxt_dl_
+ #endif /* CONFIG_BNXT_SRIOV */
+ };
+
++static const struct devlink_ops bnxt_vf_dl_ops;
++
+ enum bnxt_dl_param_id {
+ BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
+@@ -439,7 +441,10 @@ int bnxt_dl_register(struct bnxt *bp)
+ return -ENOTSUPP;
+ }
+
+- dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
++ if (BNXT_PF(bp))
++ dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
++ else
++ dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+ if (!dl) {
+ netdev_warn(bp->dev, "devlink_alloc failed");
+ return -ENOMEM;
+@@ -458,6 +463,9 @@ int bnxt_dl_register(struct bnxt *bp)
+ goto err_dl_free;
+ }
+
++ if (!BNXT_PF(bp))
++ return 0;
++
+ rc = devlink_params_register(dl, bnxt_dl_params,
+ ARRAY_SIZE(bnxt_dl_params));
+ if (rc) {
+@@ -507,11 +515,14 @@ void bnxt_dl_unregister(struct bnxt *bp)
+ if (!dl)
+ return;
+
+- devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+- ARRAY_SIZE(bnxt_dl_port_params));
+- devlink_port_unregister(&bp->dl_port);
+- devlink_params_unregister(dl, bnxt_dl_params,
+- ARRAY_SIZE(bnxt_dl_params));
++ if (BNXT_PF(bp)) {
++ devlink_port_params_unregister(&bp->dl_port,
++ bnxt_dl_port_params,
++ ARRAY_SIZE(bnxt_dl_port_params));
++ devlink_port_unregister(&bp->dl_port);
++ devlink_params_unregister(dl, bnxt_dl_params,
++ ARRAY_SIZE(bnxt_dl_params));
++ }
+ devlink_unregister(dl);
+ devlink_free(dl);
+ }
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:10 -0500
+Subject: bnxt_en: Fix bp->fw_health allocation and free logic.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 8280b38e01f71e0f89389ccad3fa43b79e57c604 ]
+
+bp->fw_health needs to be allocated for either the firmware initiated
+reset feature or the driver initiated error recovery feature. The
+current code is not allocating bp->fw_health for all the necessary cases.
+This patch corrects the logic to allocate bp->fw_health correctly when
+needed. If allocation fails, we clear the feature flags.
+
+We also add the the missing kfree(bp->fw_health) when the driver is
+unloaded. If we get an async reset message from the firmware, we also
+need to make sure that we have a valid bp->fw_health before proceeding.
+
+Fixes: 07f83d72d238 ("bnxt_en: Discover firmware error recovery capabilities.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 +++++++++++++++++++++---------
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1
+ 2 files changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1995,6 +1995,9 @@ static int bnxt_async_event_process(stru
+ case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
+ u32 data1 = le32_to_cpu(cmpl->event_data1);
+
++ if (!bp->fw_health)
++ goto async_event_process_exit;
++
+ bp->fw_reset_timestamp = jiffies;
+ bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
+ if (!bp->fw_reset_min_dsecs)
+@@ -4438,8 +4441,9 @@ static int bnxt_hwrm_func_drv_rgtr(struc
+ FUNC_DRV_RGTR_REQ_ENABLES_VER);
+
+ req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
+- flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
+- FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
++ flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
++ if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
++ flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
+ if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
+ flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT;
+ req.flags = cpu_to_le32(flags);
+@@ -7096,14 +7100,6 @@ static int bnxt_hwrm_error_recovery_qcfg
+ rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (rc)
+ goto err_recovery_out;
+- if (!fw_health) {
+- fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
+- bp->fw_health = fw_health;
+- if (!fw_health) {
+- rc = -ENOMEM;
+- goto err_recovery_out;
+- }
+- }
+ fw_health->flags = le32_to_cpu(resp->flags);
+ if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
+ !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
+@@ -10419,6 +10415,23 @@ static void bnxt_init_dflt_coal(struct b
+ bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+ }
+
++static void bnxt_alloc_fw_health(struct bnxt *bp)
++{
++ if (bp->fw_health)
++ return;
++
++ if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
++ !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
++ return;
++
++ bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
++ if (!bp->fw_health) {
++ netdev_warn(bp->dev, "Failed to allocate fw_health\n");
++ bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
++ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
++ }
++}
++
+ static int bnxt_fw_init_one_p1(struct bnxt *bp)
+ {
+ int rc;
+@@ -10465,6 +10478,7 @@ static int bnxt_fw_init_one_p2(struct bn
+ netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
+ rc);
+
++ bnxt_alloc_fw_health(bp);
+ rc = bnxt_hwrm_error_recovery_qcfg(bp);
+ if (rc)
+ netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
+@@ -11344,6 +11358,8 @@ static void bnxt_remove_one(struct pci_d
+ bnxt_dcb_free(bp);
+ kfree(bp->edev);
+ bp->edev = NULL;
++ kfree(bp->fw_health);
++ bp->fw_health = NULL;
+ bnxt_cleanup_pci(bp);
+ bnxt_free_ctx_mem(bp);
+ kfree(bp->ctx);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1658,6 +1658,7 @@ struct bnxt {
+ #define BNXT_FW_CAP_PCIE_STATS_SUPPORTED 0x00020000
+ #define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000
+ #define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000
++ #define BNXT_FW_CAP_HOT_RESET 0x00200000
+
+ #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
+ u32 hwrm_spec_code;
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:07 -0500
+Subject: bnxt_en: Fix MSIX request logic for RDMA driver.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 0c722ec0a289c7f6b53f89bad1cfb7c4db3f7a62 ]
+
+The logic needs to check both bp->total_irqs and the reserved IRQs in
+hw_resc->resv_irqs if applicable and see if both are enough to cover
+the L2 and RDMA requested vectors. The current code is only checking
+bp->total_irqs and can fail in some code paths, such as the TX timeout
+code path with the RDMA driver requesting vectors after recovery. In
+this code path, we have not reserved enough MSIX resources for the
+RDMA driver yet.
+
+Fixes: 75720e6323a1 ("bnxt_en: Keep track of reserved IRQs.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+@@ -113,8 +113,10 @@ static int bnxt_req_msix_vecs(struct bnx
+ {
+ struct net_device *dev = edev->net;
+ struct bnxt *bp = netdev_priv(dev);
++ struct bnxt_hw_resc *hw_resc;
+ int max_idx, max_cp_rings;
+ int avail_msix, idx;
++ int total_vecs;
+ int rc = 0;
+
+ ASSERT_RTNL();
+@@ -142,7 +144,10 @@ static int bnxt_req_msix_vecs(struct bnx
+ }
+ edev->ulp_tbl[ulp_id].msix_base = idx;
+ edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
+- if (bp->total_irqs < (idx + avail_msix)) {
++ hw_resc = &bp->hw_resc;
++ total_vecs = idx + avail_msix;
++ if (bp->total_irqs < total_vecs ||
++ (BNXT_NEW_RM(bp) && hw_resc->resv_irqs < total_vecs)) {
+ if (netif_running(dev)) {
+ bnxt_close_nic(bp, true, false);
+ rc = bnxt_open_nic(bp, true, false);
+@@ -156,7 +161,6 @@ static int bnxt_req_msix_vecs(struct bnx
+ }
+
+ if (BNXT_NEW_RM(bp)) {
+- struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+ int resv_msix;
+
+ resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:12 -0500
+Subject: bnxt_en: Fix the logic that creates the health reporters.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 937f188c1f4f89b3fa93ba31fc8587dc1fb14a22 ]
+
+Fix the logic to properly check the fw capabilities and create the
+devlink health reporters only when needed. The current code creates
+the reporters unconditionally as long as bp->fw_health is valid, and
+that's not correct.
+
+Call bnxt_dl_fw_reporters_create() directly from the init and reset
+code path instead of from bnxt_dl_register(). This allows the
+reporters to be adjusted when capabilities change. The same
+applies to bnxt_dl_fw_reporters_destroy().
+
+Fixes: 6763c779c2d8 ("bnxt_en: Add new FW devlink_health_reporter")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 +++
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 64 +++++++++++++++-------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h | 2
+ 3 files changed, 56 insertions(+), 21 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -10563,6 +10563,12 @@ static int bnxt_fw_init_one(struct bnxt
+ rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
+ if (rc)
+ return rc;
++
++ /* In case fw capabilities have changed, destroy the unneeded
++ * reporters and create newly capable ones.
++ */
++ bnxt_dl_fw_reporters_destroy(bp, false);
++ bnxt_dl_fw_reporters_create(bp);
+ bnxt_fw_init_one_p3(bp);
+ return 0;
+ }
+@@ -11339,6 +11345,7 @@ static void bnxt_remove_one(struct pci_d
+
+ if (BNXT_PF(bp)) {
+ bnxt_sriov_disable(bp);
++ bnxt_dl_fw_reporters_destroy(bp, true);
+ bnxt_dl_unregister(bp);
+ }
+
+@@ -11837,8 +11844,10 @@ static int bnxt_init_one(struct pci_dev
+ if (rc)
+ goto init_err_cleanup_tc;
+
+- if (BNXT_PF(bp))
++ if (BNXT_PF(bp)) {
+ bnxt_dl_register(bp);
++ bnxt_dl_fw_reporters_create(bp);
++ }
+
+ netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
+ board_info[ent->driver_data].name,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -102,21 +102,15 @@ struct devlink_health_reporter_ops bnxt_
+ .recover = bnxt_fw_fatal_recover,
+ };
+
+-static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
++void bnxt_dl_fw_reporters_create(struct bnxt *bp)
+ {
+ struct bnxt_fw_health *health = bp->fw_health;
+
+- if (!health)
++ if (!bp->dl || !health)
+ return;
+
+- health->fw_reporter =
+- devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
+- 0, false, bp);
+- if (IS_ERR(health->fw_reporter)) {
+- netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
+- PTR_ERR(health->fw_reporter));
+- health->fw_reporter = NULL;
+- }
++ if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
++ goto err_recovery;
+
+ health->fw_reset_reporter =
+ devlink_health_reporter_create(bp->dl,
+@@ -126,8 +120,30 @@ static void bnxt_dl_fw_reporters_create(
+ netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+ PTR_ERR(health->fw_reset_reporter));
+ health->fw_reset_reporter = NULL;
++ bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
+ }
+
++err_recovery:
++ if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
++ return;
++
++ if (!health->fw_reporter) {
++ health->fw_reporter =
++ devlink_health_reporter_create(bp->dl,
++ &bnxt_dl_fw_reporter_ops,
++ 0, false, bp);
++ if (IS_ERR(health->fw_reporter)) {
++ netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
++ PTR_ERR(health->fw_reporter));
++ health->fw_reporter = NULL;
++ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
++ return;
++ }
++ }
++
++ if (health->fw_fatal_reporter)
++ return;
++
+ health->fw_fatal_reporter =
+ devlink_health_reporter_create(bp->dl,
+ &bnxt_dl_fw_fatal_reporter_ops,
+@@ -136,24 +152,35 @@ static void bnxt_dl_fw_reporters_create(
+ netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+ PTR_ERR(health->fw_fatal_reporter));
+ health->fw_fatal_reporter = NULL;
++ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+ }
+ }
+
+-static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
++void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
+ {
+ struct bnxt_fw_health *health = bp->fw_health;
+
+- if (!health)
++ if (!bp->dl || !health)
+ return;
+
+- if (health->fw_reporter)
+- devlink_health_reporter_destroy(health->fw_reporter);
+-
+- if (health->fw_reset_reporter)
++ if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
++ health->fw_reset_reporter) {
+ devlink_health_reporter_destroy(health->fw_reset_reporter);
++ health->fw_reset_reporter = NULL;
++ }
+
+- if (health->fw_fatal_reporter)
++ if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
++ return;
++
++ if (health->fw_reporter) {
++ devlink_health_reporter_destroy(health->fw_reporter);
++ health->fw_reporter = NULL;
++ }
++
++ if (health->fw_fatal_reporter) {
+ devlink_health_reporter_destroy(health->fw_fatal_reporter);
++ health->fw_fatal_reporter = NULL;
++ }
+ }
+
+ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
+@@ -458,8 +485,6 @@ int bnxt_dl_register(struct bnxt *bp)
+
+ devlink_params_publish(dl);
+
+- bnxt_dl_fw_reporters_create(bp);
+-
+ return 0;
+
+ err_dl_port_unreg:
+@@ -482,7 +507,6 @@ void bnxt_dl_unregister(struct bnxt *bp)
+ if (!dl)
+ return;
+
+- bnxt_dl_fw_reporters_destroy(bp);
+ devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+ ARRAY_SIZE(bnxt_dl_port_params));
+ devlink_port_unregister(&bp->dl_port);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+@@ -57,6 +57,8 @@ struct bnxt_dl_nvm_param {
+ };
+
+ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
++void bnxt_dl_fw_reporters_create(struct bnxt *bp);
++void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
+ int bnxt_dl_register(struct bnxt *bp);
+ void bnxt_dl_unregister(struct bnxt *bp);
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:08 -0500
+Subject: bnxt_en: Free context memory in the open path if firmware has been reset.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 325f85f37e5b35807d86185bdf2c64d2980c44ba ]
+
+This will trigger new context memory to be rediscovered and allocated
+during the re-probe process after a firmware reset. Without this, the
+newly reset firmware does not have valid context memory and the driver
+will eventually fail to allocate some resources.
+
+Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8766,6 +8766,9 @@ static int bnxt_hwrm_if_change(struct bn
+ }
+ if (resc_reinit || fw_reset) {
+ if (fw_reset) {
++ bnxt_free_ctx_mem(bp);
++ kfree(bp->ctx);
++ bp->ctx = NULL;
+ rc = bnxt_fw_init_one(bp);
+ if (rc) {
+ set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:11 -0500
+Subject: bnxt_en: Remove unnecessary NULL checks for fw_health
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 0797c10d2d1fa0d6f14612404781b348fc757c3e ]
+
+After fixing the allocation of bp->fw_health in the previous patch,
+the driver will not go through the fw reset and recovery code paths
+if bp->fw_health allocation fails. So we can now remove the
+unnecessary NULL checks.
+
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++----
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 6 +-----
+ 2 files changed, 3 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -9953,8 +9953,7 @@ static void bnxt_fw_health_check(struct
+ struct bnxt_fw_health *fw_health = bp->fw_health;
+ u32 val;
+
+- if (!fw_health || !fw_health->enabled ||
+- test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
++ if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ return;
+
+ if (fw_health->tmr_counter) {
+@@ -10697,8 +10696,7 @@ static void bnxt_fw_reset_task(struct wo
+ bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
+ return;
+ case BNXT_FW_RESET_STATE_ENABLE_DEV:
+- if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
+- bp->fw_health) {
++ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
+ u32 val;
+
+ val = bnxt_fw_health_readl(bp,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -19,11 +19,10 @@ static int bnxt_fw_reporter_diagnose(str
+ struct devlink_fmsg *fmsg)
+ {
+ struct bnxt *bp = devlink_health_reporter_priv(reporter);
+- struct bnxt_fw_health *health = bp->fw_health;
+ u32 val, health_status;
+ int rc;
+
+- if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
++ if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ return 0;
+
+ val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+@@ -162,9 +161,6 @@ void bnxt_devlink_health_report(struct b
+ struct bnxt_fw_health *fw_health = bp->fw_health;
+ struct bnxt_fw_reporter_ctx fw_reporter_ctx;
+
+- if (!fw_health)
+- return;
+-
+ fw_reporter_ctx.sp_event = event;
+ switch (event) {
+ case BNXT_FW_RESET_NOTIFY_SP_EVENT:
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:09 -0500
+Subject: bnxt_en: Return error if FW returns more data than dump length
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit c74751f4c39232c31214ec6a3bc1c7e62f5c728b ]
+
+If any change happened in the configuration of VF in VM while
+collecting live dump, there could be a race and firmware can return
+more data than allocated dump length. Fix it by keeping track of
+the accumulated core dump length copied so far and abort the copy
+with error code if the next chunk of core dump will exceed the
+original dump length.
+
+Fixes: 6c5657d085ae ("bnxt_en: Add support for ethtool get dump.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 38 +++++++++++++++++-----
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h | 4 ++
+ 2 files changed, 34 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -3064,8 +3064,15 @@ static int bnxt_hwrm_dbg_dma_data(struct
+ }
+ }
+
+- if (info->dest_buf)
+- memcpy(info->dest_buf + off, dma_buf, len);
++ if (info->dest_buf) {
++ if ((info->seg_start + off + len) <=
++ BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
++ memcpy(info->dest_buf + off, dma_buf, len);
++ } else {
++ rc = -ENOBUFS;
++ break;
++ }
++ }
+
+ if (cmn_req->req_type ==
+ cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
+@@ -3119,7 +3126,7 @@ static int bnxt_hwrm_dbg_coredump_initia
+
+ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
+ u16 segment_id, u32 *seg_len,
+- void *buf, u32 offset)
++ void *buf, u32 buf_len, u32 offset)
+ {
+ struct hwrm_dbg_coredump_retrieve_input req = {0};
+ struct bnxt_hwrm_dbg_dma_info info = {NULL};
+@@ -3134,8 +3141,11 @@ static int bnxt_hwrm_dbg_coredump_retrie
+ seq_no);
+ info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
+ data_len);
+- if (buf)
++ if (buf) {
+ info.dest_buf = buf + offset;
++ info.buf_len = buf_len;
++ info.seg_start = offset;
++ }
+
+ rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+ if (!rc)
+@@ -3225,14 +3235,17 @@ bnxt_fill_coredump_record(struct bnxt *b
+ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
+ {
+ u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
++ u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
+ struct coredump_segment_record *seg_record = NULL;
+- u32 offset = 0, seg_hdr_len, seg_record_len;
+ struct bnxt_coredump_segment_hdr seg_hdr;
+ struct bnxt_coredump coredump = {NULL};
+ time64_t start_time;
+ u16 start_utc;
+ int rc = 0, i;
+
++ if (buf)
++ buf_len = *dump_len;
++
+ start_time = ktime_get_real_seconds();
+ start_utc = sys_tz.tz_minuteswest * 60;
+ seg_hdr_len = sizeof(seg_hdr);
+@@ -3265,6 +3278,12 @@ static int bnxt_get_coredump(struct bnxt
+ u32 duration = 0, seg_len = 0;
+ unsigned long start, end;
+
++ if (buf && ((offset + seg_hdr_len) >
++ BNXT_COREDUMP_BUF_LEN(buf_len))) {
++ rc = -ENOBUFS;
++ goto err;
++ }
++
+ start = jiffies;
+
+ rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
+@@ -3277,9 +3296,11 @@ static int bnxt_get_coredump(struct bnxt
+
+ /* Write segment data into the buffer */
+ rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
+- &seg_len, buf,
++ &seg_len, buf, buf_len,
+ offset + seg_hdr_len);
+- if (rc)
++ if (rc && rc == -ENOBUFS)
++ goto err;
++ else if (rc)
+ netdev_err(bp->dev,
+ "Failed to retrieve coredump for seg = %d\n",
+ seg_record->segment_id);
+@@ -3309,7 +3330,8 @@ err:
+ rc);
+ kfree(coredump.data);
+ *dump_len += sizeof(struct bnxt_coredump_record);
+-
++ if (rc == -ENOBUFS)
++ netdev_err(bp->dev, "Firmware returned large coredump buffer");
+ return rc;
+ }
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+@@ -31,6 +31,8 @@ struct bnxt_coredump {
+ u16 total_segs;
+ };
+
++#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
++
+ struct bnxt_hwrm_dbg_dma_info {
+ void *dest_buf;
+ int dest_buf_size;
+@@ -38,6 +40,8 @@ struct bnxt_hwrm_dbg_dma_info {
+ u16 seq_off;
+ u16 data_len_off;
+ u16 segs;
++ u32 seg_start;
++ u32 buf_len;
+ };
+
+ struct hwrm_dbg_cmn_input {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Fri, 6 Dec 2019 15:44:55 -0800
+Subject: bonding: fix active-backup transition after link failure
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+[ Upstream commit 5d485ed88d48f8101a2067348e267c0aaf4ed486 ]
+
+After the recent fix in commit 1899bb325149 ("bonding: fix state
+transition issue in link monitoring"), the active-backup mode with
+miimon initially come-up fine but after a link-failure, both members
+transition into backup state.
+
+Following steps to reproduce the scenario (eth1 and eth2 are the
+slaves of the bond):
+
+ ip link set eth1 up
+ ip link set eth2 down
+ sleep 1
+ ip link set eth2 up
+ ip link set eth1 down
+ cat /sys/class/net/eth1/bonding_slave/state
+ cat /sys/class/net/eth2/bonding_slave/state
+
+Fixes: 1899bb325149 ("bonding: fix state transition issue in link monitoring")
+CC: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -2225,9 +2225,6 @@ static void bond_miimon_commit(struct bo
+ } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+ /* make it immediately active */
+ bond_set_active_slave(slave);
+- } else if (slave != primary) {
+- /* prevent it from being the active one */
+- bond_set_backup_slave(slave);
+ }
+
+ slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n",
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Date: Mon, 30 Dec 2019 18:14:08 +0530
+Subject: cxgb4/cxgb4vf: fix flow control display for auto negotiation
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 0caeaf6ad532f9be5a768a158627cb31921cc8b7 ]
+
+As per 802.3-2005, Section Two, Annex 28B, Table 28B-2 [1], when
+_only_ Rx pause is enabled, both symmetric and asymmetric pause
+towards local device must be enabled. Also, firmware returns the local
+device's flow control pause params as part of advertised capabilities
+and negotiated params as part of current link attributes. So, fix up
+ethtool's flow control pause params fetch logic to read from acaps,
+instead of linkattr.
+
+[1] https://standards.ieee.org/standard/802_3-2005.html
+
+Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities")
+Signed-off-by: Surendra Mobiya <surendra@chelsio.com>
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 4 +--
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 21 ++++++++++++--------
+ drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 4 +--
+ drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h | 1
+ drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 18 ++++++++++-------
+ 6 files changed, 30 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+@@ -503,6 +503,7 @@ struct link_config {
+
+ enum cc_pause requested_fc; /* flow control user has requested */
+ enum cc_pause fc; /* actual link flow control */
++ enum cc_pause advertised_fc; /* actual advertised flow control */
+
+ enum cc_fec requested_fec; /* Forward Error Correction: */
+ enum cc_fec fec; /* requested and actual in use */
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -793,8 +793,8 @@ static void get_pauseparam(struct net_de
+ struct port_info *p = netdev_priv(dev);
+
+ epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
+- epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
+- epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
++ epause->rx_pause = (p->link_cfg.advertised_fc & PAUSE_RX) != 0;
++ epause->tx_pause = (p->link_cfg.advertised_fc & PAUSE_TX) != 0;
+ }
+
+ static int set_pauseparam(struct net_device *dev,
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -4089,7 +4089,8 @@ static inline fw_port_cap32_t cc_to_fwca
+ if (cc_pause & PAUSE_TX)
+ fw_pause |= FW_PORT_CAP32_802_3_PAUSE;
+ else
+- fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
++ fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR |
++ FW_PORT_CAP32_802_3_PAUSE;
+ } else if (cc_pause & PAUSE_TX) {
+ fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+ }
+@@ -8563,17 +8564,17 @@ static fw_port_cap32_t lstatus_to_fwcap(
+ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
+ {
+ const struct fw_port_cmd *cmd = (const void *)rpl;
+- int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+- struct adapter *adapter = pi->adapter;
++ fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+ struct link_config *lc = &pi->link_cfg;
+- int link_ok, linkdnrc;
+- enum fw_port_type port_type;
++ struct adapter *adapter = pi->adapter;
++ unsigned int speed, fc, fec, adv_fc;
+ enum fw_port_module_type mod_type;
+- unsigned int speed, fc, fec;
+- fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
++ int action, link_ok, linkdnrc;
++ enum fw_port_type port_type;
+
+ /* Extract the various fields from the Port Information message.
+ */
++ action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+ switch (action) {
+ case FW_PORT_ACTION_GET_PORT_INFO: {
+ u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+@@ -8611,6 +8612,7 @@ void t4_handle_get_port_info(struct port
+ }
+
+ fec = fwcap_to_cc_fec(acaps);
++ adv_fc = fwcap_to_cc_pause(acaps);
+ fc = fwcap_to_cc_pause(linkattr);
+ speed = fwcap_to_speed(linkattr);
+
+@@ -8667,7 +8669,9 @@ void t4_handle_get_port_info(struct port
+ }
+
+ if (link_ok != lc->link_ok || speed != lc->speed ||
+- fc != lc->fc || fec != lc->fec) { /* something changed */
++ fc != lc->fc || adv_fc != lc->advertised_fc ||
++ fec != lc->fec) {
++ /* something changed */
+ if (!link_ok && lc->link_ok) {
+ lc->link_down_rc = linkdnrc;
+ dev_warn_ratelimited(adapter->pdev_dev,
+@@ -8677,6 +8681,7 @@ void t4_handle_get_port_info(struct port
+ }
+ lc->link_ok = link_ok;
+ lc->speed = speed;
++ lc->advertised_fc = adv_fc;
+ lc->fc = fc;
+ lc->fec = fec;
+
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+@@ -1690,8 +1690,8 @@ static void cxgb4vf_get_pauseparam(struc
+ struct port_info *pi = netdev_priv(dev);
+
+ pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
+- pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
+- pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
++ pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
++ pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
+ }
+
+ /*
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+@@ -135,6 +135,7 @@ struct link_config {
+
+ enum cc_pause requested_fc; /* flow control user has requested */
+ enum cc_pause fc; /* actual link flow control */
++ enum cc_pause advertised_fc; /* actual advertised flow control */
+
+ enum cc_fec auto_fec; /* Forward Error Correction: */
+ enum cc_fec requested_fec; /* "automatic" (IEEE 802.3), */
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+@@ -1913,16 +1913,16 @@ static const char *t4vf_link_down_rc_str
+ static void t4vf_handle_get_port_info(struct port_info *pi,
+ const struct fw_port_cmd *cmd)
+ {
+- int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+- struct adapter *adapter = pi->adapter;
++ fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+ struct link_config *lc = &pi->link_cfg;
+- int link_ok, linkdnrc;
+- enum fw_port_type port_type;
++ struct adapter *adapter = pi->adapter;
++ unsigned int speed, fc, fec, adv_fc;
+ enum fw_port_module_type mod_type;
+- unsigned int speed, fc, fec;
+- fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
++ int action, link_ok, linkdnrc;
++ enum fw_port_type port_type;
+
+ /* Extract the various fields from the Port Information message. */
++ action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+ switch (action) {
+ case FW_PORT_ACTION_GET_PORT_INFO: {
+ u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+@@ -1982,6 +1982,7 @@ static void t4vf_handle_get_port_info(st
+ }
+
+ fec = fwcap_to_cc_fec(acaps);
++ adv_fc = fwcap_to_cc_pause(acaps);
+ fc = fwcap_to_cc_pause(linkattr);
+ speed = fwcap_to_speed(linkattr);
+
+@@ -2012,7 +2013,9 @@ static void t4vf_handle_get_port_info(st
+ }
+
+ if (link_ok != lc->link_ok || speed != lc->speed ||
+- fc != lc->fc || fec != lc->fec) { /* something changed */
++ fc != lc->fc || adv_fc != lc->advertised_fc ||
++ fec != lc->fec) {
++ /* something changed */
+ if (!link_ok && lc->link_ok) {
+ lc->link_down_rc = linkdnrc;
+ dev_warn_ratelimited(adapter->pdev_dev,
+@@ -2022,6 +2025,7 @@ static void t4vf_handle_get_port_info(st
+ }
+ lc->link_ok = link_ok;
+ lc->speed = speed;
++ lc->advertised_fc = adv_fc;
+ lc->fc = fc;
+ lc->fec = fec;
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:48 +0000
+Subject: gtp: avoid zero size hashtable
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ]
+
+GTP default hashtable size is 1024 and userspace could set specific
+hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0
+from userspace, hashtable will not work and panic will occur.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -660,10 +660,13 @@ static int gtp_newlink(struct net *src_n
+ if (err < 0)
+ return err;
+
+- if (!data[IFLA_GTP_PDP_HASHSIZE])
++ if (!data[IFLA_GTP_PDP_HASHSIZE]) {
+ hashsize = 1024;
+- else
++ } else {
+ hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
++ if (!hashsize)
++ hashsize = 1024;
++ }
+
+ err = gtp_hashtable_new(gtp, hashsize);
+ if (err < 0)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:00 +0000
+Subject: gtp: do not allow adding duplicate tid and ms_addr pdp context
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ]
+
+GTP RX packet path lookups pdp context with TID. If duplicate TID pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, TID value should be unique.
+GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, ms_addr value should be unique.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -928,24 +928,31 @@ static void ipv4_pdp_fill(struct pdp_ctx
+ }
+ }
+
+-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+- struct genl_info *info)
++static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
++ struct genl_info *info)
+ {
++ struct pdp_ctx *pctx, *pctx_tid = NULL;
+ struct net_device *dev = gtp->dev;
+ u32 hash_ms, hash_tid = 0;
+- struct pdp_ctx *pctx;
++ unsigned int version;
+ bool found = false;
+ __be32 ms_addr;
+
+ ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
+ hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
++ version = nla_get_u32(info->attrs[GTPA_VERSION]);
+
+- hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) {
+- if (pctx->ms_addr_ip4.s_addr == ms_addr) {
+- found = true;
+- break;
+- }
+- }
++ pctx = ipv4_pdp_find(gtp, ms_addr);
++ if (pctx)
++ found = true;
++ if (version == GTP_V0)
++ pctx_tid = gtp0_pdp_find(gtp,
++ nla_get_u64(info->attrs[GTPA_TID]));
++ else if (version == GTP_V1)
++ pctx_tid = gtp1_pdp_find(gtp,
++ nla_get_u32(info->attrs[GTPA_I_TEI]));
++ if (pctx_tid)
++ found = true;
+
+ if (found) {
+ if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
+@@ -953,6 +960,11 @@ static int ipv4_pdp_add(struct gtp_dev *
+ if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
++ if (pctx && pctx_tid)
++ return -EEXIST;
++ if (!pctx)
++ pctx = pctx_tid;
++
+ ipv4_pdp_fill(pctx, info);
+
+ if (pctx->gtp_version == GTP_V0)
+@@ -1076,7 +1088,7 @@ static int gtp_genl_new_pdp(struct sk_bu
+ goto out_unlock;
+ }
+
+- err = ipv4_pdp_add(gtp, sk, info);
++ err = gtp_pdp_add(gtp, sk, info);
+
+ out_unlock:
+ rcu_read_unlock();
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:11 +0800
+Subject: gtp: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not
+call dst_confirm_neigh(), we still set it to false to keep consistency with
+IPv6 code.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -541,7 +541,7 @@ static int gtp_build_skb_ip4(struct sk_b
+ mtu = dst_mtu(&rt->dst);
+ }
+
+- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
+
+ if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+ mtu < ntohs(iph->tot_len)) {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:34 +0000
+Subject: gtp: fix an use-after-free in ipv4_pdp_find()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ]
+
+ipv4_pdp_find() is called in TX packet path of GTP.
+ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context.
+In the current code, gtp->tid_hash and gtp->addr_hash are freed by
+->dellink(), which is gtp_dellink().
+But gtp_dellink() would be called while packets are processing.
+So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash.
+Instead, dev->priv_destructor() would be used because this callback
+is called after all packet processing safely.
+
+Test commands:
+ ip link add veth1 type veth peer name veth2
+ ip a a 172.0.0.1/24 dev veth1
+ ip link set veth1 up
+ ip a a 172.99.0.1/32 dev lo
+
+ gtp-link add gtp1 &
+
+ gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2
+ ip r a 172.99.0.2/32 dev gtp1
+ ip link set gtp1 mtu 1500
+
+ ip netns add ns2
+ ip link set veth2 netns ns2
+ ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2
+ ip netns exec ns2 ip link set veth2 up
+ ip netns exec ns2 ip a a 172.99.0.2/32 dev lo
+ ip netns exec ns2 ip link set lo up
+
+ ip netns exec ns2 gtp-link add gtp2 &
+ ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1
+ ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2
+ ip netns exec ns2 ip link set gtp2 mtu 1500
+
+ hping3 172.99.0.2 -2 --flood &
+ ip link del gtp1
+
+Splat looks like:
+[ 72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195
+[ 72.569631][ T1195]
+[ 72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199
+[ 72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[ 72.571438][ T1195] Call Trace:
+[ 72.571764][ T1195] dump_stack+0x96/0xdb
+[ 72.572171][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.572761][ T1195] print_address_description.constprop.5+0x1be/0x360
+[ 72.573400][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.573971][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.574544][ T1195] __kasan_report+0x12a/0x16f
+[ 72.575014][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.575593][ T1195] kasan_report+0xe/0x20
+[ 72.576004][ T1195] ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.576577][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ ... ]
+[ 72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28
+[ 72.648512][ T1195] #PF: supervisor read access in kernel mode
+[ 72.649158][ T1195] #PF: error_code(0x0000) - not-present page
+[ 72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060
+[ 72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[ 72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G B 5.5.0-rc1 #199
+[ 72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp]
+[ ... ]
+[ 72.679168][ T1195] Call Trace:
+[ 72.679603][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ 72.681915][ T1195] ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp]
+[ 72.682513][ T1195] ? lock_acquire+0x164/0x3b0
+[ 72.682966][ T1195] ? gtp_dev_xmit+0x35e/0x890 [gtp]
+[ 72.683481][ T1195] gtp_dev_xmit+0x3c2/0x890 [gtp]
+[ ... ]
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -640,9 +640,16 @@ static void gtp_link_setup(struct net_de
+ }
+
+ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
+-static void gtp_hashtable_free(struct gtp_dev *gtp);
+ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+
++static void gtp_destructor(struct net_device *dev)
++{
++ struct gtp_dev *gtp = netdev_priv(dev);
++
++ kfree(gtp->addr_hash);
++ kfree(gtp->tid_hash);
++}
++
+ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+@@ -680,13 +687,15 @@ static int gtp_newlink(struct net *src_n
+
+ gn = net_generic(dev_net(dev), gtp_net_id);
+ list_add_rcu(>p->list, &gn->gtp_dev_list);
++ dev->priv_destructor = gtp_destructor;
+
+ netdev_dbg(dev, "registered new GTP interface\n");
+
+ return 0;
+
+ out_hashtable:
+- gtp_hashtable_free(gtp);
++ kfree(gtp->addr_hash);
++ kfree(gtp->tid_hash);
+ out_encap:
+ gtp_encap_disable(gtp);
+ return err;
+@@ -695,8 +704,13 @@ out_encap:
+ static void gtp_dellink(struct net_device *dev, struct list_head *head)
+ {
+ struct gtp_dev *gtp = netdev_priv(dev);
++ struct pdp_ctx *pctx;
++ int i;
++
++ for (i = 0; i < gtp->hash_size; i++)
++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid)
++ pdp_context_delete(pctx);
+
+- gtp_hashtable_free(gtp);
+ list_del_rcu(>p->list);
+ unregister_netdevice_queue(dev, head);
+ }
+@@ -774,20 +788,6 @@ err1:
+ return -ENOMEM;
+ }
+
+-static void gtp_hashtable_free(struct gtp_dev *gtp)
+-{
+- struct pdp_ctx *pctx;
+- int i;
+-
+- for (i = 0; i < gtp->hash_size; i++)
+- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid)
+- pdp_context_delete(pctx);
+-
+- synchronize_rcu();
+- kfree(gtp->addr_hash);
+- kfree(gtp->tid_hash);
+-}
+-
+ static struct sock *gtp_encap_enable_socket(int fd, int type,
+ struct gtp_dev *gtp)
+ {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:17 +0000
+Subject: gtp: fix wrong condition in gtp_genl_dump_pdp()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ]
+
+gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used
+to dump pdp contexts. it would be re-executed because of dump packet size.
+
+If dump packet size is too big, it saves current dump pointer
+(gtp interface pointer, bucket, TID value) then it restarts dump from
+last pointer.
+Current GTP code allows adding zero TID pdp context but dump code
+ignores zero TID value. So, last dump pointer will not be found.
+
+In addition, this patch adds missing rcu_read_lock() in
+gtp_genl_dump_pdp().
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -38,7 +38,6 @@ struct pdp_ctx {
+ struct hlist_node hlist_addr;
+
+ union {
+- u64 tid;
+ struct {
+ u64 tid;
+ u16 flow;
+@@ -1232,43 +1231,46 @@ static int gtp_genl_dump_pdp(struct sk_b
+ struct netlink_callback *cb)
+ {
+ struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
++ int i, j, bucket = cb->args[0], skip = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+- struct gtp_net *gn = net_generic(net, gtp_net_id);
+- unsigned long tid = cb->args[1];
+- int i, k = cb->args[0], ret;
+ struct pdp_ctx *pctx;
++ struct gtp_net *gn;
++
++ gn = net_generic(net, gtp_net_id);
+
+ if (cb->args[4])
+ return 0;
+
++ rcu_read_lock();
+ list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
+ if (last_gtp && last_gtp != gtp)
+ continue;
+ else
+ last_gtp = NULL;
+
+- for (i = k; i < gtp->hash_size; i++) {
+- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) {
+- if (tid && tid != pctx->u.tid)
+- continue;
+- else
+- tid = 0;
+-
+- ret = gtp_genl_fill_info(skb,
+- NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq,
+- cb->nlh->nlmsg_type, pctx);
+- if (ret < 0) {
++ for (i = bucket; i < gtp->hash_size; i++) {
++ j = 0;
++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i],
++ hlist_tid) {
++ if (j >= skip &&
++ gtp_genl_fill_info(skb,
++ NETLINK_CB(cb->skb).portid,
++ cb->nlh->nlmsg_seq,
++ cb->nlh->nlmsg_type, pctx)) {
+ cb->args[0] = i;
+- cb->args[1] = pctx->u.tid;
++ cb->args[1] = j;
+ cb->args[2] = (unsigned long)gtp;
+ goto out;
+ }
++ j++;
+ }
++ skip = 0;
+ }
++ bucket = 0;
+ }
+ cb->args[4] = 1;
+ out:
++ rcu_read_unlock();
+ return skb->len;
+ }
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 11 Dec 2019 14:26:27 -0800
+Subject: hv_netvsc: Fix tx_table init in rndis_set_subchannel()
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Upstream commit c39ea5cba5a2e97fc01b78c85208bf31383b399c ]
+
+Host can provide send indirection table messages anytime after RSS is
+enabled by calling rndis_filter_set_rss_param(). So the host provided
+table values may be overwritten by the initialization in
+rndis_set_subchannel().
+
+To prevent this problem, move the tx_table initialization before calling
+rndis_filter_set_rss_param().
+
+Fixes: a6fb6aa3cfa9 ("hv_netvsc: Set tx_table to equal weight after subchannels open")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1165,6 +1165,9 @@ int rndis_set_subchannel(struct net_devi
+ wait_event(nvdev->subchan_open,
+ atomic_read(&nvdev->open_chn) == nvdev->num_chn);
+
++ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++ ndev_ctx->tx_table[i] = i % nvdev->num_chn;
++
+ /* ignore failures from setting rss parameters, still have channels */
+ if (dev_info)
+ rndis_filter_set_rss_param(rdev, dev_info->rss_key);
+@@ -1174,9 +1177,6 @@ int rndis_set_subchannel(struct net_devi
+ netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
+ netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
+
+- for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+- ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+-
+ return 0;
+ }
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:10 +0800
+Subject: ip6_gre: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ]
+
+When we do ipv6 gre pmtu update, we will also do neigh confirm currently.
+This will cause the neigh cache be refreshed and set to REACHABLE before
+xmit.
+
+But if the remote mac address changed, e.g. device is deleted and recreated,
+we will not able to notice this and still use the old mac address as the neigh
+cache is REACHABLE.
+
+Fix this by disable neigh confirm when do pmtu update
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Wed, 11 Dec 2019 22:20:16 +0800
+Subject: ipv6/addrconf: only check invalid header values when NETLINK_F_STRICT_CHK is set
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 2beb6d2901a3f73106485d560c49981144aeacb1 ]
+
+In commit 4b1373de73a3 ("net: ipv6: addr: perform strict checks also for
+doit handlers") we add strict check for inet6_rtm_getaddr(). But we did
+the invalid header values check before checking if NETLINK_F_STRICT_CHK
+is set. This may break backwards compatibility if user already set the
+ifm->ifa_prefixlen, ifm->ifa_flags, ifm->ifa_scope in their netlink code.
+
+I didn't move the nlmsg_len check because I thought it's a valid check.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Fixes: 4b1373de73a3 ("net: ipv6: addr: perform strict checks also for doit handlers")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5231,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(s
+ return -EINVAL;
+ }
+
++ if (!netlink_strict_get_check(skb))
++ return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
++ ifa_ipv6_policy, extack);
++
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
+ return -EINVAL;
+ }
+
+- if (!netlink_strict_get_check(skb))
+- return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+- ifa_ipv6_policy, extack);
+-
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+ if (err)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Sun, 29 Dec 2019 13:40:23 +0200
+Subject: mlxsw: spectrum: Use dedicated policer for VRRP packets
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit acca789a358cc960be3937851d7de6591c79d6c2 ]
+
+Currently, VRRP packets and packets that hit exceptions during routing
+(e.g., MTU error) are policed using the same policer towards the CPU.
+This means, for example, that misconfiguration of the MTU on a routed
+interface can prevent VRRP packets from reaching the CPU, which in turn
+can cause the VRRP daemon to assume it is the Master router.
+
+Fix this by using a dedicated policer for VRRP packets.
+
+Fixes: 11566d34f895 ("mlxsw: spectrum: Add VRRP traps")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Alex Veber <alexve@mellanox.com>
+Tested-by: Alex Veber <alexve@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 +
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 9 +++++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+@@ -5421,6 +5421,7 @@ enum mlxsw_reg_htgt_trap_group {
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
++ MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
+
+ __MLXSW_REG_HTGT_TRAP_GROUP_MAX,
+ MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -4398,8 +4398,8 @@ static const struct mlxsw_listener mlxsw
+ MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
+- MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+- MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
++ MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
++ MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
+ /* PKT Sample trap */
+ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
+ false, SP_IP2ME, DISCARD),
+@@ -4483,6 +4483,10 @@ static int mlxsw_sp_cpu_policers_set(str
+ rate = 19 * 1024;
+ burst_size = 12;
+ break;
++ case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
++ rate = 360;
++ burst_size = 7;
++ break;
+ default:
+ continue;
+ }
+@@ -4522,6 +4526,7 @@ static int mlxsw_sp_trap_groups_set(stru
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
++ case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
+ priority = 5;
+ tc = 5;
+ break;
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Amit Cohen <amitc@mellanox.com>
+Date: Sun, 29 Dec 2019 13:40:22 +0200
+Subject: mlxsw: spectrum_router: Skip loopback RIFs during MAC validation
+
+From: Amit Cohen <amitc@mellanox.com>
+
+[ Upstream commit 314bd842d98e1035cc40b671a71e07f48420e58f ]
+
+When a router interface (RIF) is created the MAC address of the backing
+netdev is verified to have the same MSBs as existing RIFs. This is
+required in order to avoid changing existing RIF MAC addresses that all
+share the same MSBs.
+
+Loopback RIFs are special in this regard as they do not have a MAC
+address, given they are only used to loop packets from the overlay to
+the underlay.
+
+Without this change, an error is returned when trying to create a RIF
+after the creation of a GRE tunnel that is represented by a loopback
+RIF. 'rif->dev->dev_addr' points to the GRE device's local IP, which
+does not share the same MSBs as physical interfaces. Adding an IP
+address to any physical interface results in:
+
+Error: mlxsw_spectrum: All router interface MAC addresses must have the
+same prefix.
+
+Fix this by skipping loopback RIFs during MAC validation.
+
+Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses")
+Signed-off-by: Amit Cohen <amitc@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -6985,6 +6985,9 @@ static int mlxsw_sp_router_port_check_ri
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ rif = mlxsw_sp->router->rifs[i];
++ if (rif && rif->ops &&
++ rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
++ continue;
+ if (rif && rif->dev && rif->dev != dev &&
+ !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
+ mlxsw_sp->mac_mask)) {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:09 +0800
+Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ]
+
+The MTU update code is supposed to be invoked in response to real
+networking events that update the PMTU. In IPv6 PMTU update function
+__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
+confirmed time.
+
+But for tunnel code, it will call pmtu before xmit, like:
+ - tnl_update_pmtu()
+ - skb_dst_update_pmtu()
+ - ip6_rt_update_pmtu()
+ - __ip6_rt_update_pmtu()
+ - dst_confirm_neigh()
+
+If the tunnel remote dst mac address changed and we still do the neigh
+confirm, we will not be able to update neigh cache and ping6 remote
+will failed.
+
+So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
+should not be invoking dst_confirm_neigh() as we have no evidence
+of successful two-way communication at this point.
+
+On the other hand it is also important to keep the neigh reachability fresh
+for TCP flows, so we cannot remove this dst_confirm_neigh() call.
+
+To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
+to choose whether we should do neigh update or not. I will add the parameter
+in this patch and set all the callers to true to comply with the previous
+way, and fix the tunnel code one by one on later patches.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Suggested-by: David Miller <davem@davemloft.net>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 2 +-
+ include/net/dst.h | 2 +-
+ include/net/dst_ops.h | 3 ++-
+ net/bridge/br_nf_core.c | 3 ++-
+ net/decnet/dn_route.c | 6 ++++--
+ net/ipv4/inet_connection_sock.c | 2 +-
+ net/ipv4/route.c | 9 ++++++---
+ net/ipv4/xfrm4_policy.c | 5 +++--
+ net/ipv6/inet6_connection_sock.c | 2 +-
+ net/ipv6/ip6_gre.c | 2 +-
+ net/ipv6/route.c | 22 +++++++++++++++-------
+ net/ipv6/xfrm6_policy.c | 5 +++--
+ net/netfilter/ipvs/ip_vs_xmit.c | 2 +-
+ net/sctp/transport.c | 2 +-
+ 14 files changed, 42 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -541,7 +541,7 @@ static int gtp_build_skb_ip4(struct sk_b
+ mtu = dst_mtu(&rt->dst);
+ }
+
+- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
+
+ if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+ mtu < ntohs(iph->tot_len)) {
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -516,7 +516,7 @@ static inline void skb_dst_update_pmtu(s
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+- dst->ops->update_pmtu(dst, NULL, skb, mtu);
++ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -27,7 +27,8 @@ struct dst_ops {
+ struct dst_entry * (*negative_advice)(struct dst_entry *);
+ void (*link_failure)(struct sk_buff *);
+ void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ void (*redirect)(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -22,7 +22,8 @@
+ #endif
+
+ static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_ent
+ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+ static void dn_dst_link_failure(struct sk_buff *);
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb , u32 mtu);
++ struct sk_buff *skb , u32 mtu,
++ bool confirm_neigh);
+ static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+@@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops
+ * advertise to the other end).
+ */
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *n = rt->n;
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1086,7 +1086,7 @@ struct dst_entry *inet_csk_update_pmtu(s
+ if (!dst)
+ goto out;
+ }
+- dst->ops->update_pmtu(dst, sk, NULL, mtu);
++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+
+ dst = __sk_dst_check(sk, 0);
+ if (!dst)
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -139,7 +139,8 @@ static unsigned int ipv4_mtu(const stru
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+ static void ipv4_link_failure(struct sk_buff *skb);
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static void ipv4_dst_destroy(struct dst_entry *dst);
+@@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct r
+ }
+
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct rtable *rt = (struct rtable *) dst;
+ struct flowi4 fl4;
+@@ -2648,7 +2650,8 @@ static unsigned int ipv4_blackhole_mtu(c
+ }
+
+ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_ds
+ }
+
+ static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+- path->ops->update_pmtu(path, sk, skb, mtu);
++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+
+ static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(
+
+ if (IS_ERR(dst))
+ return NULL;
+- dst->ops->update_pmtu(dst, sk, NULL, mtu);
++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+
+ dst = inet6_csk_route_socket(sk, &fl6);
+ return IS_ERR(dst) ? NULL : dst;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -95,7 +95,8 @@ static int ip6_pkt_prohibit(struct sk_b
+ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
+ static void ip6_link_failure(struct sk_buff *skb);
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(co
+ }
+
+ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+@@ -2695,7 +2697,8 @@ static bool rt6_cache_allowed_for_pmtu(c
+ }
+
+ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+- const struct ipv6hdr *iph, u32 mtu)
++ const struct ipv6hdr *iph, u32 mtu,
++ bool confirm_neigh)
+ {
+ const struct in6_addr *daddr, *saddr;
+ struct rt6_info *rt6 = (struct rt6_info *)dst;
+@@ -2713,7 +2716,10 @@ static void __ip6_rt_update_pmtu(struct
+ daddr = NULL;
+ saddr = NULL;
+ }
+- dst_confirm_neigh(dst, daddr);
++
++ if (confirm_neigh)
++ dst_confirm_neigh(dst, daddr);
++
+ mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu >= dst_mtu(dst))
+ return;
+@@ -2767,9 +2773,11 @@ out_unlock:
+ }
+
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
++ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
++ confirm_neigh);
+ }
+
+ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+@@ -2788,7 +2796,7 @@ void ip6_update_pmtu(struct sk_buff *skb
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
++ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
+ dst_release(dst);
+ }
+ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_ds
+ }
+
+ static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+- path->ops->update_pmtu(path, sk, skb, mtu);
++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+
+ static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk_fullsock(sk))
+- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
++ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
+ }
+
+ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct s
+
+ pf->af->from_sk(&addr, sk);
+ pf->to_sk_daddr(&t->ipaddr, sk);
+- dst->ops->update_pmtu(dst, sk, NULL, pmtu);
++ dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
+ pf->to_sk_daddr(&addr, sk);
+
+ dst = sctp_transport_dst_check(t);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 20 Dec 2019 11:24:21 -0800
+Subject: net: dsa: bcm_sf2: Fix IP fragment location and behavior
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 7c3125f0a6ebc17846c5908ad7d6056d66c1c426 ]
+
+The IP fragment is specified through user-defined field as the first
+bit of the first user-defined word. We were previously trying to extract
+it from the user-defined mask which could not possibly work. The ip_frag
+is also supposed to be a boolean, if we do not cast it as such, we risk
+overwriting the next fields in CFP_DATA(6) which would render the rule
+inoperative.
+
+Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2_cfp.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -358,7 +358,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(str
+ return -EINVAL;
+ }
+
+- ip_frag = be32_to_cpu(fs->m_ext.data[0]);
++ ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
+
+ /* Locate the first rule available */
+ if (fs->location == RX_CLS_LOC_ANY)
+@@ -569,7 +569,7 @@ static int bcm_sf2_cfp_rule_cmp(struct b
+
+ if (rule->fs.flow_type != fs->flow_type ||
+ rule->fs.ring_cookie != fs->ring_cookie ||
+- rule->fs.m_ext.data[0] != fs->m_ext.data[0])
++ rule->fs.h_ext.data[0] != fs->h_ext.data[0])
+ continue;
+
+ switch (fs->flow_type & ~FLOW_EXT) {
+@@ -621,7 +621,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(str
+ return -EINVAL;
+ }
+
+- ip_frag = be32_to_cpu(fs->m_ext.data[0]);
++ ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
+
+ layout = &udf_tcpip6_layout;
+ slice_num = bcm_sf2_get_slice_number(layout, 0);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladimir Oltean <olteanv@gmail.com>
+Date: Fri, 27 Dec 2019 03:11:13 +0200
+Subject: net: dsa: sja1105: Reconcile the meaning of TPID and TPID2 for E/T and P/Q/R/S
+
+From: Vladimir Oltean <olteanv@gmail.com>
+
+[ Upstream commit 54fa49ee88138756df0fcf867cb1849904710a8c ]
+
+For first-generation switches (SJA1105E and SJA1105T):
+- TPID means C-Tag (typically 0x8100)
+- TPID2 means S-Tag (typically 0x88A8)
+
+While for the second generation switches (SJA1105P, SJA1105Q, SJA1105R,
+SJA1105S) it is the other way around:
+- TPID means S-Tag (typically 0x88A8)
+- TPID2 means C-Tag (typically 0x8100)
+
+In other words, E/T tags untagged traffic with TPID, and P/Q/R/S with
+TPID2.
+
+So the patch mentioned below fixed VLAN filtering for P/Q/R/S, but broke
+it for E/T.
+
+We strive for a common code path for all switches in the family, so just
+lie in the static config packing functions that TPID and TPID2 are at
+swapped bit offsets than they actually are, for P/Q/R/S. This will make
+both switches understand TPID to be ETH_P_8021Q and TPID2 to be
+ETH_P_8021AD. The meaning from the original E/T was chosen over P/Q/R/S
+because E/T is actually the one with public documentation available
+(UM10944.pdf).
+
+Fixes: f9a1a7646c0d ("net: dsa: sja1105: Reverse TPID and TPID2")
+Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/sja1105/sja1105_main.c | 8 ++++----
+ drivers/net/dsa/sja1105/sja1105_static_config.c | 7 +++++--
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -1560,8 +1560,8 @@ static int sja1105_vlan_filtering(struct
+
+ if (enabled) {
+ /* Enable VLAN filtering. */
+- tpid = ETH_P_8021AD;
+- tpid2 = ETH_P_8021Q;
++ tpid = ETH_P_8021Q;
++ tpid2 = ETH_P_8021AD;
+ } else {
+ /* Disable VLAN filtering. */
+ tpid = ETH_P_SJA1105;
+@@ -1570,9 +1570,9 @@ static int sja1105_vlan_filtering(struct
+
+ table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
+ general_params = table->entries;
+- /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
+- general_params->tpid = tpid;
+ /* EtherType used to identify inner tagged (C-tag) VLAN traffic */
++ general_params->tpid = tpid;
++ /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
+ general_params->tpid2 = tpid2;
+ /* When VLAN filtering is on, we need to at least be able to
+ * decode management traffic through the "backup plan".
+--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
++++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
+@@ -142,6 +142,9 @@ static size_t sja1105et_general_params_e
+ return size;
+ }
+
++/* TPID and TPID2 are intentionally reversed so that semantic
++ * compatibility with E/T is kept.
++ */
+ static size_t
+ sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
+ enum packing_op op)
+@@ -166,9 +169,9 @@ sja1105pqrs_general_params_entry_packing
+ sja1105_packing(buf, &entry->mirr_port, 141, 139, size, op);
+ sja1105_packing(buf, &entry->vlmarker, 138, 107, size, op);
+ sja1105_packing(buf, &entry->vlmask, 106, 75, size, op);
+- sja1105_packing(buf, &entry->tpid, 74, 59, size, op);
++ sja1105_packing(buf, &entry->tpid2, 74, 59, size, op);
+ sja1105_packing(buf, &entry->ignore2stf, 58, 58, size, op);
+- sja1105_packing(buf, &entry->tpid2, 57, 42, size, op);
++ sja1105_packing(buf, &entry->tpid, 57, 42, size, op);
+ sja1105_packing(buf, &entry->queue_ts, 41, 41, size, op);
+ sja1105_packing(buf, &entry->egrmirrvid, 40, 29, size, op);
+ sja1105_packing(buf, &entry->egrmirrpcp, 28, 26, size, op);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:12 +0800
+Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ]
+
+Add a new function skb_dst_update_pmtu_no_confirm() for callers who need
+update pmtu but should not do neighbor confirm.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -519,6 +519,15 @@ static inline void skb_dst_update_pmtu(s
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+
++/* update dst pmtu but not do neighbor confirm */
++static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
++{
++ struct dst_entry *dst = skb_dst(skb);
++
++ if (dst && dst->ops->update_pmtu)
++ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
++}
++
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+ struct dst_entry *encap_dst,
+ int headroom)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:16 +0800
+Subject: net/dst: do not confirm neighbor for vxlan and geneve pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit f081042d128a0c7acbd67611def62e1b52e2d294 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+So disable the neigh confirm for vxlan and geneve pmtu update.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
+Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -535,7 +535,7 @@ static inline void skb_tunnel_check_pmtu
+ u32 encap_mtu = dst_mtu(encap_dst);
+
+ if (skb->len > encap_mtu - headroom)
+- skb_dst_update_pmtu(skb, encap_mtu - headroom);
++ skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
+ }
+
+ #endif /* _NET_DST_H */
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:32 PM CET
+From: Netanel Belgazal <netanel@amazon.com>
+Date: Tue, 10 Dec 2019 11:27:44 +0000
+Subject: net: ena: fix napi handler misbehavior when the napi budget is zero
+
+From: Netanel Belgazal <netanel@amazon.com>
+
+[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ]
+
+In netpoll the napi handler could be called with budget equal to zero.
+Current ENA napi handler doesn't take that into consideration.
+
+The napi handler handles Rx packets in a do-while loop.
+Currently, the budget check happens only after decrementing the
+budget, therefore the napi handler, in rare cases, could run over
+MAX_INT packets.
+
+In addition to that, this moves all budget related variables to int
+calculation and stop mixing u32 to avoid ambiguity
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Netanel Belgazal <netanel@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1238,8 +1238,8 @@ static int ena_io_poll(struct napi_struc
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ struct ena_ring *tx_ring, *rx_ring;
+
+- u32 tx_work_done;
+- u32 rx_work_done;
++ int tx_work_done;
++ int rx_work_done = 0;
+ int tx_budget;
+ int napi_comp_call = 0;
+ int ret;
+@@ -1256,7 +1256,11 @@ static int ena_io_poll(struct napi_struc
+ }
+
+ tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+- rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
++ /* On netpoll the budget is zero and the handler should only clean the
++ * tx completions.
++ */
++ if (likely(budget))
++ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Tue, 10 Dec 2019 22:33:05 +0000
+Subject: net: marvell: mvpp2: phylink requires the link interrupt
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f3f2364ea14d1cf6bf966542f31eadcf178f1577 ]
+
+phylink requires the MAC to report when its link status changes when
+operating in inband modes. Failure to report link status changes
+means that phylink has no idea when the link events happen, which
+results in either the network interface's carrier remaining up or
+remaining permanently down.
+
+For example, with a fiber module, if the interface is brought up and
+link is initially established, taking the link down at the far end
+will cut the optical power. The SFP module's LOS asserts, we
+deactivate the link, and the network interface reports no carrier.
+
+When the far end is brought back up, the SFP module's LOS deasserts,
+but the MAC may be slower to establish link. If this happens (which
+in my tests is a certainty) then phylink never hears that the MAC
+has established link with the far end, and the network interface is
+stuck reporting no carrier. This means the interface is
+non-functional.
+
+Avoiding the link interrupt when we have phylink is basically not
+an option, so remove the !port->phylink from the test.
+
+Fixes: 4bb043262878 ("net: mvpp2: phylink support")
+Tested-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Tested-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -3674,7 +3674,7 @@ static int mvpp2_open(struct net_device
+ valid = true;
+ }
+
+- if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
++ if (priv->hw_version == MVPP22 && port->link_irq) {
+ err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+ dev->name, port);
+ if (err) {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Date: Thu, 26 Dec 2019 10:41:56 +0200
+Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning
+
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+
+[ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ]
+
+The burning process requires to perform internal allocations of large
+chunks of memory. This memory doesn't need to be contiguous and can be
+safely allocated by vzalloc() instead of kzalloc(). This patch changes
+such allocation to avoid possible out-of-memory failure.
+
+Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process")
+Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Reviewed-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Tested-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+@@ -6,6 +6,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/netlink.h>
++#include <linux/vmalloc.h>
+ #include <linux/xz.h>
+ #include "mlxfw_mfa2.h"
+ #include "mlxfw_mfa2_file.h"
+@@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const stru
+ comp_size = be32_to_cpu(comp->size);
+ comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
+
+- comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
++ comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
+ if (!comp_data)
+ return ERR_PTR(-ENOMEM);
+ comp_data->comp.data_size = comp_size;
+@@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const stru
+ comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
+ return &comp_data->comp;
+ err_out:
+- kfree(comp_data);
++ vfree(comp_data);
+ return ERR_PTR(err);
+ }
+
+@@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struc
+ const struct mlxfw_mfa2_comp_data *comp_data;
+
+ comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
+- kfree(comp_data);
++ vfree(comp_data);
+ }
+
+ void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Madalin Bucur <madalin.bucur@oss.nxp.com>
+Date: Mon, 23 Dec 2019 10:06:10 +0200
+Subject: net: phy: aquantia: add suspend / resume ops for AQR105
+
+From: Madalin Bucur <madalin.bucur@oss.nxp.com>
+
+[ Upstream commit 1c93fb45761e79b3c00080e71523886cefaf351c ]
+
+The suspend/resume code for AQR107 works on AQR105 too.
+This patch fixes issues with the partner not seeing the link down
+when the interface using AQR105 is brought down.
+
+Fixes: bee8259dd31f ("net: phy: add driver for aquantia phy")
+Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/aquantia_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/phy/aquantia_main.c
++++ b/drivers/net/phy/aquantia_main.c
+@@ -627,6 +627,8 @@ static struct phy_driver aqr_driver[] =
+ .config_intr = aqr_config_intr,
+ .ack_interrupt = aqr_ack_interrupt,
+ .read_status = aqr_read_status,
++ .suspend = aqr107_suspend,
++ .resume = aqr107_resume,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 13 Dec 2019 10:06:30 +0000
+Subject: net: phylink: fix interface passed to mac_link_up
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 9b2079c046a9d6c9c73a4ec33816678565ee01f3 ]
+
+A mismerge between the following two commits:
+
+c678726305b9 ("net: phylink: ensure consistent phy interface mode")
+27755ff88c0e ("net: phylink: Add phylink_mac_link_{up, down} wrapper functions")
+
+resulted in the wrong interface being passed to the mac_link_up()
+function. Fix this up.
+
+Fixes: b4b12b0d2f02 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phylink.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -444,8 +444,7 @@ static void phylink_mac_link_up(struct p
+
+ pl->cur_interface = link_state.interface;
+ pl->ops->mac_link_up(pl->config, pl->link_an_mode,
+- pl->phy_state.interface,
+- pl->phydev);
++ pl->cur_interface, pl->phydev);
+
+ if (ndev)
+ netif_carrier_on(ndev);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Shmulik Ladkani <sladkani@proofpoint.com>
+Date: Wed, 25 Dec 2019 10:51:01 +0200
+Subject: net/sched: act_mirred: Pull mac prior redir to non mac_header_xmit device
+
+From: Shmulik Ladkani <sladkani@proofpoint.com>
+
+[ Upstream commit 70cf3dc7313207816255b9acb0dffb19dae78144 ]
+
+There's no skb_pull performed when a mirred action is set at egress of a
+mac device, with a target device/action that expects skb->data to point
+at the network header.
+
+As a result, either the target device is errornously given an skb with
+data pointing to the mac (egress case), or the net stack receives the
+skb with data pointing to the mac (ingress case).
+
+E.g:
+ # tc qdisc add dev eth9 root handle 1: prio
+ # tc filter add dev eth9 parent 1: prio 9 protocol ip handle 9 basic \
+ action mirred egress redirect dev tun0
+
+ (tun0 is a tun device. result: tun0 errornously gets the eth header
+ instead of the iph)
+
+Revise the push/pull logic of tcf_mirred_act() to not rely on the
+skb_at_tc_ingress() vs tcf_mirred_act_wants_ingress() comparison, as it
+does not cover all "pull" cases.
+
+Instead, calculate whether the required action on the target device
+requires the data to point at the network header, and compare this to
+whether skb->data points to network header - and make the push/pull
+adjustments as necessary.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Shmulik Ladkani <sladkani@proofpoint.com>
+Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -219,8 +219,10 @@ static int tcf_mirred_act(struct sk_buff
+ bool use_reinsert;
+ bool want_ingress;
+ bool is_redirect;
++ bool expects_nh;
+ int m_eaction;
+ int mac_len;
++ bool at_nh;
+
+ rec_level = __this_cpu_inc_return(mirred_rec_level);
+ if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+@@ -261,19 +263,19 @@ static int tcf_mirred_act(struct sk_buff
+ goto out;
+ }
+
+- /* If action's target direction differs than filter's direction,
+- * and devices expect a mac header on xmit, then mac push/pull is
+- * needed.
+- */
+ want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+- if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
+- if (!skb_at_tc_ingress(skb)) {
+- /* caught at egress, act ingress: pull mac */
+- mac_len = skb_network_header(skb) - skb_mac_header(skb);
++
++ expects_nh = want_ingress || !m_mac_header_xmit;
++ at_nh = skb->data == skb_network_header(skb);
++ if (at_nh != expects_nh) {
++ mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
++ skb_network_header(skb) - skb_mac_header(skb);
++ if (expects_nh) {
++ /* target device/action expect data at nh */
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+- /* caught at ingress, act egress: push mac */
+- skb_push_rcsum(skb2, skb->mac_len);
++ /* target device/action expect data at mac */
++ skb_push_rcsum(skb2, mac_len);
+ }
+ }
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Sat, 28 Dec 2019 16:36:58 +0100
+Subject: net/sched: add delete_empty() to filters and use it in cls_flower
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit a5b72a083da197b493c7ed1e5730d62d3199f7d6 ]
+
+Revert "net/sched: cls_u32: fix refcount leak in the error path of
+u32_change()", and fix the u32 refcount leak in a more generic way that
+preserves the semantic of rule dumping.
+On tc filters that don't support lockless insertion/removal, there is no
+need to guard against concurrent insertion when a removal is in progress.
+Therefore, for most of them we can avoid a full walk() when deleting, and
+just decrease the refcount, like it was done on older Linux kernels.
+This fixes situations where walk() was wrongly detecting a non-empty
+filter, like it happened with cls_u32 in the error path of change(), thus
+leading to failures in the following tdc selftests:
+
+ 6aa7: (filter, u32) Add/Replace u32 with source match and invalid indev
+ 6658: (filter, u32) Add/Replace u32 with custom hash table and invalid handle
+ 74c2: (filter, u32) Add/Replace u32 filter with invalid hash table id
+
+On cls_flower, and on (future) lockless filters, this check is necessary:
+move all the check_empty() logic in a callback so that each filter
+can have its own implementation. For cls_flower, it's sufficient to check
+if no IDRs have been allocated.
+
+This reverts commit 275c44aa194b7159d1191817b20e076f55f0e620.
+
+Changes since v1:
+ - document the need for delete_empty() when TCF_PROTO_OPS_DOIT_UNLOCKED
+ is used, thanks to Vlad Buslov
+ - implement delete_empty() without doing fl_walk(), thanks to Vlad Buslov
+ - squash revert and new fix in a single patch, to be nice with bisect
+ tests that run tdc on u32 filter, thanks to Dave Miller
+
+Fixes: 275c44aa194b ("net/sched: cls_u32: fix refcount leak in the error path of u32_change()")
+Fixes: 6676d5e416ee ("net: sched: set dedicated tcf_walker flag when tp is empty")
+Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Suggested-by: Vlad Buslov <vladbu@mellanox.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
+Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h | 5 +++++
+ net/sched/cls_api.c | 31 +++++--------------------------
+ net/sched/cls_flower.c | 12 ++++++++++++
+ 3 files changed, 22 insertions(+), 26 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -308,6 +308,7 @@ struct tcf_proto_ops {
+ int (*delete)(struct tcf_proto *tp, void *arg,
+ bool *last, bool rtnl_held,
+ struct netlink_ext_ack *);
++ bool (*delete_empty)(struct tcf_proto *tp);
+ void (*walk)(struct tcf_proto *tp,
+ struct tcf_walker *arg, bool rtnl_held);
+ int (*reoffload)(struct tcf_proto *tp, bool add,
+@@ -336,6 +337,10 @@ struct tcf_proto_ops {
+ int flags;
+ };
+
++/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
++ * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
++ * conditions can occur when filters are inserted/deleted simultaneously.
++ */
+ enum tcf_proto_ops_flags {
+ TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
+ };
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_pro
+ tcf_proto_destroy(tp, rtnl_held, true, extack);
+ }
+
+-static int walker_check_empty(struct tcf_proto *tp, void *fh,
+- struct tcf_walker *arg)
++static bool tcf_proto_check_delete(struct tcf_proto *tp)
+ {
+- if (fh) {
+- arg->nonempty = true;
+- return -1;
+- }
+- return 0;
+-}
+-
+-static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+-{
+- struct tcf_walker walker = { .fn = walker_check_empty, };
+-
+- if (tp->ops->walk) {
+- tp->ops->walk(tp, &walker, rtnl_held);
+- return !walker.nonempty;
+- }
+- return true;
+-}
++ if (tp->ops->delete_empty)
++ return tp->ops->delete_empty(tp);
+
+-static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+-{
+- spin_lock(&tp->lock);
+- if (tcf_proto_is_empty(tp, rtnl_held))
+- tp->deleting = true;
+- spin_unlock(&tp->lock);
++ tp->deleting = true;
+ return tp->deleting;
+ }
+
+@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(st
+ * concurrently.
+ * Mark tp for deletion if it is empty.
+ */
+- if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
++ if (!tp_iter || !tcf_proto_check_delete(tp)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ return;
+ }
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -2519,6 +2519,17 @@ static void fl_bind_class(void *fh, u32
+ f->res.class = cl;
+ }
+
++static bool fl_delete_empty(struct tcf_proto *tp)
++{
++ struct cls_fl_head *head = fl_head_dereference(tp);
++
++ spin_lock(&tp->lock);
++ tp->deleting = idr_is_empty(&head->handle_idr);
++ spin_unlock(&tp->lock);
++
++ return tp->deleting;
++}
++
+ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+ .kind = "flower",
+ .classify = fl_classify,
+@@ -2528,6 +2539,7 @@ static struct tcf_proto_ops cls_fl_ops _
+ .put = fl_put,
+ .change = fl_change,
+ .delete = fl_delete,
++ .delete_empty = fl_delete_empty,
+ .walk = fl_walk,
+ .reoffload = fl_reoffload,
+ .hw_add = fl_hw_add,
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Date: Thu, 26 Dec 2019 20:01:01 +0100
+Subject: net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+[ Upstream commit bd6f48546b9cb7a785344fc78058c420923d7ed8 ]
+
+GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for
+the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate
+can be adjusted at runtime.
+
+So far we have been running MPLL2 with ~250MHz (and the internal
+m250_div with value 1), which worked enough that we could transfer data
+with an TX delay of 4ns. Unfortunately there is high packet loss with
+an RGMII PHY when transferring data (receiving data works fine though).
+Odroid-C1's u-boot is running with a TX delay of only 2ns as well as
+the internal m250_div set to 2 - no lost (TX) packets can be observed
+with that setting in u-boot.
+
+Manual testing has shown that the TX packet loss goes away when using
+the following settings in Linux (the vendor kernel uses the same
+settings):
+- MPLL2 clock set to ~500MHz
+- m250_div set to 2
+- TX delay set to 2ns on the MAC side
+
+Update the m250_div divider settings to only accept dividers greater or
+equal 2 to fix the TX delay generated by the MAC.
+
+iperf3 results before the change:
+[ ID] Interval Transfer Bitrate Retr
+[ 5] 0.00-10.00 sec 182 MBytes 153 Mbits/sec 514 sender
+[ 5] 0.00-10.00 sec 182 MBytes 152 Mbits/sec receiver
+
+iperf3 results after the change (including an updated TX delay of 2ns):
+[ ID] Interval Transfer Bitrate Retr Cwnd
+[ 5] 0.00-10.00 sec 927 MBytes 778 Mbits/sec 0 sender
+[ 5] 0.00-10.01 sec 927 MBytes 777 Mbits/sec receiver
+
+Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration")
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+@@ -112,6 +112,14 @@ static int meson8b_init_rgmii_tx_clk(str
+ struct device *dev = dwmac->dev;
+ const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+ struct meson8b_dwmac_clk_configs *clk_configs;
++ static const struct clk_div_table div_table[] = {
++ { .div = 2, .val = 2, },
++ { .div = 3, .val = 3, },
++ { .div = 4, .val = 4, },
++ { .div = 5, .val = 5, },
++ { .div = 6, .val = 6, },
++ { .div = 7, .val = 7, },
++ };
+
+ clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+ if (!clk_configs)
+@@ -146,9 +154,9 @@ static int meson8b_init_rgmii_tx_clk(str
+ clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+ clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+ clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+- clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+- CLK_DIVIDER_ALLOW_ZERO |
+- CLK_DIVIDER_ROUND_CLOSEST;
++ clk_configs->m250_div.table = div_table;
++ clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
++ CLK_DIVIDER_ROUND_CLOSEST;
+ clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+ &clk_divider_ops,
+ &clk_configs->m250_div.hw);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 23 Dec 2019 11:13:24 -0800
+Subject: net_sched: sch_fq: properly set sk->sk_pacing_status
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit bb3d0b8bf5be61ab1d6f472c43cbf34de17e796b ]
+
+If fq_classify() recycles a struct fq_flow because
+a socket structure has been reallocated, we do not
+set sk->sk_pacing_status immediately, but later if the
+flow becomes detached.
+
+This means that any flow requiring pacing (BBR, or SO_MAX_PACING_RATE)
+might fallback to TCP internal pacing, which requires a per-socket
+high resolution timer, and therefore more cpu cycles.
+
+Fixes: 218af599fa63 ("tcp: internal implementation for pacing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Soheil Hassas Yeganeh <soheil@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_fq.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/net/sched/sch_fq.c
++++ b/net/sched/sch_fq.c
+@@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struc
+ f->socket_hash != sk->sk_hash)) {
+ f->credit = q->initial_quantum;
+ f->socket_hash = sk->sk_hash;
++ if (q->rate_enable)
++ smp_store_release(&sk->sk_pacing_status,
++ SK_PACING_FQ);
+ if (fq_flow_is_throttled(f))
+ fq_flow_unset_throttled(q, f);
+ f->time_next_packet = 0ULL;
+@@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struc
+
+ fq_flow_set_detached(f);
+ f->sk = sk;
+- if (skb->sk == sk)
++ if (skb->sk == sk) {
+ f->socket_hash = sk->sk_hash;
++ if (q->rate_enable)
++ smp_store_release(&sk->sk_pacing_status,
++ SK_PACING_FQ);
++ }
+ f->credit = q->initial_quantum;
+
+ rb_link_node(&f->fq_node, parent, p);
+@@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *sk
+ f->qlen++;
+ qdisc_qstats_backlog_inc(sch, skb);
+ if (fq_flow_is_detached(f)) {
+- struct sock *sk = skb->sk;
+-
+ fq_flow_add_tail(&q->new_flows, f);
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
+- if (sk && q->rate_enable) {
+- if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+- SK_PACING_FQ))
+- smp_store_release(&sk->sk_pacing_status,
+- SK_PACING_FQ);
+- }
+ q->inactive_flows--;
+ }
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Fri, 27 Dec 2019 03:26:27 +0100
+Subject: ptp: fix the race between the release of ptp_clock and cdev
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ]
+
+In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
+device is removed, closing this file leads to a race. This reproduces
+easily in a kvm virtual machine:
+
+ts# cat openptp0.c
+int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
+ts# uname -r
+5.5.0-rc3-46cf053e
+ts# cat /proc/cmdline
+... slub_debug=FZP
+ts# modprobe ptp_kvm
+ts# ./openptp0 &
+[1] 670
+opened /dev/ptp0, sleeping 10s...
+ts# rmmod ptp_kvm
+ts# ls /dev/ptp*
+ls: cannot access '/dev/ptp*': No such file or directory
+ts# ...woken up
+[ 48.010809] general protection fault: 0000 [#1] SMP
+[ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
+[ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80
+[ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
+[ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
+[ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
+[ 48.019470] ... ^^^ a slub poison
+[ 48.023854] Call Trace:
+[ 48.024050] __fput+0x21f/0x240
+[ 48.024288] task_work_run+0x79/0x90
+[ 48.024555] do_exit+0x2af/0xab0
+[ 48.024799] ? vfs_write+0x16a/0x190
+[ 48.025082] do_group_exit+0x35/0x90
+[ 48.025387] __x64_sys_exit_group+0xf/0x10
+[ 48.025737] do_syscall_64+0x3d/0x130
+[ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 48.026479] RIP: 0033:0x7f53b12082f6
+[ 48.026792] ...
+[ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
+[ 48.045001] Fixing recursive fault but reboot is needed!
+
+This happens in:
+
+static void __fput(struct file *file)
+{ ...
+ if (file->f_op->release)
+ file->f_op->release(inode, file); <<< cdev is kfree'd here
+ if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+ !(mode & FMODE_PATH))) {
+ cdev_put(inode->i_cdev); <<< cdev fields are accessed here
+
+Namely:
+
+__fput()
+ posix_clock_release()
+ kref_put(&clk->kref, delete_clock) <<< the last reference
+ delete_clock()
+ delete_ptp_clock()
+ kfree(ptp) <<< cdev is embedded in ptp
+ cdev_put
+ module_put(p->owner) <<< *p is kfree'd, bang!
+
+Here cdev is embedded in posix_clock which is embedded in ptp_clock.
+The race happens because ptp_clock's lifetime is controlled by two
+refcounts: kref and cdev.kobj in posix_clock. This is wrong.
+
+Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
+created especially for such cases. This way the parent device with its
+ptp_clock is not released until all references to the cdev are released.
+This adds a requirement that an initialized but not exposed struct
+device should be provided to posix_clock_register() by a caller instead
+of a simple dev_t.
+
+This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
+the race between the release of watchdog_core_data and cdev"). See
+details of the implementation in the commit 233ed09d7fda ("chardev: add
+helper function to register char devs with a struct device").
+
+Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
+Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
+Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ptp/ptp_clock.c | 31 ++++++++++++++-----------------
+ drivers/ptp/ptp_private.h | 2 +-
+ include/linux/posix-clock.h | 19 +++++++++++--------
+ kernel/time/posix-clock.c | 31 +++++++++++++------------------
+ 4 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -166,9 +166,9 @@ static struct posix_clock_operations ptp
+ .read = ptp_read,
+ };
+
+-static void delete_ptp_clock(struct posix_clock *pc)
++static void ptp_clock_release(struct device *dev)
+ {
+- struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
++ struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
+
+ mutex_destroy(&ptp->tsevq_mux);
+ mutex_destroy(&ptp->pincfg_mux);
+@@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(str
+ }
+
+ ptp->clock.ops = ptp_clock_ops;
+- ptp->clock.release = delete_ptp_clock;
+ ptp->info = info;
+ ptp->devid = MKDEV(major, index);
+ ptp->index = index;
+@@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(str
+ if (err)
+ goto no_pin_groups;
+
+- /* Create a new device in our class. */
+- ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+- ptp, ptp->pin_attr_groups,
+- "ptp%d", ptp->index);
+- if (IS_ERR(ptp->dev)) {
+- err = PTR_ERR(ptp->dev);
+- goto no_device;
+- }
+-
+ /* Register a new PPS source. */
+ if (info->pps) {
+ struct pps_source_info pps;
+@@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(str
+ }
+ }
+
+- /* Create a posix clock. */
+- err = posix_clock_register(&ptp->clock, ptp->devid);
++ /* Initialize a new device of our class in our clock structure. */
++ device_initialize(&ptp->dev);
++ ptp->dev.devt = ptp->devid;
++ ptp->dev.class = ptp_class;
++ ptp->dev.parent = parent;
++ ptp->dev.groups = ptp->pin_attr_groups;
++ ptp->dev.release = ptp_clock_release;
++ dev_set_drvdata(&ptp->dev, ptp);
++ dev_set_name(&ptp->dev, "ptp%d", ptp->index);
++
++ /* Create a posix clock and link it to the device. */
++ err = posix_clock_register(&ptp->clock, &ptp->dev);
+ if (err) {
+ pr_err("failed to create posix clock\n");
+ goto no_clock;
+@@ -273,8 +273,6 @@ no_clock:
+ if (ptp->pps_source)
+ pps_unregister_source(ptp->pps_source);
+ no_pps:
+- device_destroy(ptp_class, ptp->devid);
+-no_device:
+ ptp_cleanup_pin_groups(ptp);
+ no_pin_groups:
+ if (ptp->kworker)
+@@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_cloc
+ if (ptp->pps_source)
+ pps_unregister_source(ptp->pps_source);
+
+- device_destroy(ptp_class, ptp->devid);
+ ptp_cleanup_pin_groups(ptp);
+
+ posix_clock_unregister(&ptp->clock);
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -28,7 +28,7 @@ struct timestamp_event_queue {
+
+ struct ptp_clock {
+ struct posix_clock clock;
+- struct device *dev;
++ struct device dev;
+ struct ptp_clock_info *info;
+ dev_t devid;
+ int index; /* index into clocks.map */
+--- a/include/linux/posix-clock.h
++++ b/include/linux/posix-clock.h
+@@ -69,29 +69,32 @@ struct posix_clock_operations {
+ *
+ * @ops: Functional interface to the clock
+ * @cdev: Character device instance for this clock
+- * @kref: Reference count.
++ * @dev: Pointer to the clock's device.
+ * @rwsem: Protects the 'zombie' field from concurrent access.
+ * @zombie: If 'zombie' is true, then the hardware has disappeared.
+- * @release: A function to free the structure when the reference count reaches
+- * zero. May be NULL if structure is statically allocated.
+ *
+ * Drivers should embed their struct posix_clock within a private
+ * structure, obtaining a reference to it during callbacks using
+ * container_of().
++ *
++ * Drivers should supply an initialized but not exposed struct device
++ * to posix_clock_register(). It is used to manage lifetime of the
++ * driver's private structure. It's 'release' field should be set to
++ * a release function for this private structure.
+ */
+ struct posix_clock {
+ struct posix_clock_operations ops;
+ struct cdev cdev;
+- struct kref kref;
++ struct device *dev;
+ struct rw_semaphore rwsem;
+ bool zombie;
+- void (*release)(struct posix_clock *clk);
+ };
+
+ /**
+ * posix_clock_register() - register a new clock
+- * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
+- * @devid: Allocated device id
++ * @clk: Pointer to the clock. Caller must provide 'ops' field
++ * @dev: Pointer to the initialized device. Caller must provide
++ * 'release' field
+ *
+ * A clock driver calls this function to register itself with the
+ * clock device subsystem. If 'clk' points to dynamically allocated
+@@ -100,7 +103,7 @@ struct posix_clock {
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+-int posix_clock_register(struct posix_clock *clk, dev_t devid);
++int posix_clock_register(struct posix_clock *clk, struct device *dev);
+
+ /**
+ * posix_clock_unregister() - unregister a clock
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -14,8 +14,6 @@
+
+ #include "posix-timers.h"
+
+-static void delete_clock(struct kref *kref);
+-
+ /*
+ * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+ */
+@@ -125,7 +123,7 @@ static int posix_clock_open(struct inode
+ err = 0;
+
+ if (!err) {
+- kref_get(&clk->kref);
++ get_device(clk->dev);
+ fp->private_data = clk;
+ }
+ out:
+@@ -141,7 +139,7 @@ static int posix_clock_release(struct in
+ if (clk->ops.release)
+ err = clk->ops.release(clk);
+
+- kref_put(&clk->kref, delete_clock);
++ put_device(clk->dev);
+
+ fp->private_data = NULL;
+
+@@ -161,38 +159,35 @@ static const struct file_operations posi
+ #endif
+ };
+
+-int posix_clock_register(struct posix_clock *clk, dev_t devid)
++int posix_clock_register(struct posix_clock *clk, struct device *dev)
+ {
+ int err;
+
+- kref_init(&clk->kref);
+ init_rwsem(&clk->rwsem);
+
+ cdev_init(&clk->cdev, &posix_clock_file_operations);
++ err = cdev_device_add(&clk->cdev, dev);
++ if (err) {
++ pr_err("%s unable to add device %d:%d\n",
++ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
++ return err;
++ }
+ clk->cdev.owner = clk->ops.owner;
+- err = cdev_add(&clk->cdev, devid, 1);
++ clk->dev = dev;
+
+- return err;
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_register);
+
+-static void delete_clock(struct kref *kref)
+-{
+- struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+-
+- if (clk->release)
+- clk->release(clk);
+-}
+-
+ void posix_clock_unregister(struct posix_clock *clk)
+ {
+- cdev_del(&clk->cdev);
++ cdev_device_del(&clk->cdev, clk->dev);
+
+ down_write(&clk->rwsem);
+ clk->zombie = true;
+ up_write(&clk->rwsem);
+
+- kref_put(&clk->kref, delete_clock);
++ put_device(clk->dev);
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_unregister);
+
uaccess-disallow-int_max-copy-sizes.patch
drm-limit-to-int_max-in-create_blob-ioctl.patch
xfs-fix-mount-failure-crash-on-invalid-iclog-memory-access.patch
+cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch
+net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch
+net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
+net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch
+net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch
+net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch
+net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch
+net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
+ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
+tcp-fix-highest_sack-and-highest_sack_seq.patch
+udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
+bnxt_en-fix-msix-request-logic-for-rdma-driver.patch
+bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch
+bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch
+bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch
+bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch
+bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch
+bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch
+mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch
+mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch
+net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
+ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
+gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
+tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
+vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
+sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
+net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch
+net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
+gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
+gtp-avoid-zero-size-hashtable.patch
+bonding-fix-active-backup-transition-after-link-failure.patch
+tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
+tcp-dccp-fix-possible-race-__inet_lookup_established.patch
+hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch
+gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
+gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
+bnxt-apply-computed-clamp-value-for-coalece-parameter.patch
+ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch
+net-phylink-fix-interface-passed-to-mac_link_up.patch
+net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
+vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:15 +0800
+Subject: sit: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(str
+ }
+
+ if (tunnel->parms.iph.daddr)
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->len > mtu && !skb_is_gso(skb)) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 13 Dec 2019 18:20:41 -0800
+Subject: tcp/dccp: fix possible race __inet_lookup_established()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ]
+
+Michal Kubecek and Firo Yang did a very nice analysis of crashes
+happening in __inet_lookup_established().
+
+Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN
+(via a close()/socket()/listen() cycle) without a RCU grace period,
+I should not have changed listeners linkage in their hash table.
+
+They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt),
+so that a lookup can detect a socket in a hash list was moved in
+another one.
+
+Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve
+merge conflict for v4/v6 ordering fix"), we have to add
+hlist_nulls_add_tail_rcu() helper.
+
+Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Reported-by: Firo Yang <firo.yang@suse.com>
+Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
+Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++++
+ include/net/inet_hashtables.h | 12 +++++++++---
+ include/net/sock.h | 5 +++++
+ net/ipv4/inet_diag.c | 3 ++-
+ net/ipv4/inet_hashtables.c | 16 ++++++++--------
+ net/ipv4/tcp_ipv4.c | 7 ++++---
+ 6 files changed, 65 insertions(+), 15 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_
+ }
+
+ /**
++ * hlist_nulls_add_tail_rcu
++ * @n: the element to add to the hash list.
++ * @h: the list to add to.
++ *
++ * Description:
++ * Adds the specified element to the specified hlist_nulls,
++ * while permitting racing traversals.
++ *
++ * The caller must take whatever precautions are necessary
++ * (such as holding appropriate locks) to avoid racing
++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
++ * or hlist_nulls_del_rcu(), running on this same list.
++ * However, it is perfectly legal to run concurrently with
++ * the _rcu list-traversal primitives, such as
++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
++ * problems on Alpha CPUs. Regardless of the type of CPU, the
++ * list-traversal primitive must be guarded by rcu_read_lock().
++ */
++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
++ struct hlist_nulls_head *h)
++{
++ struct hlist_nulls_node *i, *last = NULL;
++
++ /* Note: write side code, so rcu accessors are not needed. */
++ for (i = h->first; !is_a_nulls(i); i = i->next)
++ last = i;
++
++ if (last) {
++ n->next = last->next;
++ n->pprev = &last->next;
++ rcu_assign_pointer(hlist_next_rcu(last), n);
++ } else {
++ hlist_nulls_add_head_rcu(n, h);
++ }
++}
++
++/**
+ * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -103,13 +103,19 @@ struct inet_bind_hashbucket {
+ struct hlist_head chain;
+ };
+
+-/*
+- * Sockets can be hashed in established or listening table
++/* Sockets can be hashed in established or listening table.
++ * We must use different 'nulls' end-of-chain value for all hash buckets :
++ * A socket might transition from ESTABLISH to LISTEN state without
++ * RCU grace period. A lookup in ehash table needs to handle this case.
+ */
++#define LISTENING_NULLS_BASE (1U << 29)
+ struct inet_listen_hashbucket {
+ spinlock_t lock;
+ unsigned int count;
+- struct hlist_head head;
++ union {
++ struct hlist_head head;
++ struct hlist_nulls_head nulls_head;
++ };
+ };
+
+ /* This is for listening sockets, thus all sockets which possess wildcards. */
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -723,6 +723,11 @@ static inline void __sk_nulls_add_node_r
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+
++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
++{
++ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
++}
++
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+ sock_hold(sk);
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -914,11 +914,12 @@ void inet_diag_dump_icsk(struct inet_has
+
+ for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+ struct inet_listen_hashbucket *ilb;
++ struct hlist_nulls_node *node;
+
+ num = 0;
+ ilb = &hashinfo->listening_hash[i];
+ spin_lock(&ilb->lock);
+- sk_for_each(sk, &ilb->head) {
++ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struc
+ struct inet_listen_hashbucket *ilb)
+ {
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
++ const struct hlist_nulls_node *node;
+ struct sock *sk2;
+ kuid_t uid = sock_i_uid(sk);
+
+- sk_for_each_rcu(sk2, &ilb->head) {
++ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+@@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct
+ }
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+- hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
++ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+ else
+- hlist_add_head_rcu(&sk->sk_node, &ilb->head);
++ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+ inet_hash2(hashinfo, sk);
+ ilb->count++;
+ sock_set_flag(sk, SOCK_RCU_FREE);
+@@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk)
+ reuseport_detach_sock(sk);
+ if (ilb) {
+ inet_unhash2(hashinfo, sk);
+- __sk_del_node_init(sk);
+- ilb->count--;
+- } else {
+- __sk_nulls_del_node_init_rcu(sk);
++ ilb->count--;
+ }
++ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ unlock:
+ spin_unlock_bh(lock);
+@@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hash
+
+ for (i = 0; i < INET_LHTABLE_SIZE; i++) {
+ spin_lock_init(&h->listening_hash[i].lock);
+- INIT_HLIST_HEAD(&h->listening_hash[i].head);
++ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
++ i + LISTENING_NULLS_BASE);
+ h->listening_hash[i].count = 0;
+ }
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2149,13 +2149,14 @@ static void *listening_get_next(struct s
+ struct tcp_iter_state *st = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct inet_listen_hashbucket *ilb;
++ struct hlist_nulls_node *node;
+ struct sock *sk = cur;
+
+ if (!sk) {
+ get_head:
+ ilb = &tcp_hashinfo.listening_hash[st->bucket];
+ spin_lock(&ilb->lock);
+- sk = sk_head(&ilb->head);
++ sk = sk_nulls_head(&ilb->nulls_head);
+ st->offset = 0;
+ goto get_sk;
+ }
+@@ -2163,9 +2164,9 @@ get_head:
+ ++st->num;
+ ++st->offset;
+
+- sk = sk_next(sk);
++ sk = sk_nulls_next(sk);
+ get_sk:
+- sk_for_each_from(sk) {
++ sk_nulls_for_each_from(sk, node) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == afinfo->family)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 12 Dec 2019 12:55:29 -0800
+Subject: tcp: do not send empty skb from tcp_write_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ]
+
+Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from
+write queue in error cases") in linux-4.14 stable triggered
+various bugs. One of them has been fixed in commit ba2ddb43f270
+("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but
+we still have crashes in some occasions.
+
+Root-cause is that when tcp_sendmsg() has allocated a fresh
+skb and could not append a fragment before being blocked
+in sk_stream_wait_memory(), tcp_write_xmit() might be called
+and decide to send this fresh and empty skb.
+
+Sending an empty packet is not only silly, it might have caused
+many issues we had in the past with tp->packets_out being
+out of sync.
+
+Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Christoph Paasch <cpaasch@apple.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2441,6 +2441,14 @@ static bool tcp_write_xmit(struct sock *
+ if (tcp_small_queue_check(sk, skb, 0))
+ break;
+
++ /* Argh, we hit an empty skb(), presumably a thread
++ * is sleeping in sendmsg()/sk_stream_wait_memory().
++ * We do not want to send a pure-ack packet and have
++ * a strange looking rtx queue with empty packet(s).
++ */
++ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
++ break;
++
+ if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+ break;
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Cambda Zhu <cambda@linux.alibaba.com>
+Date: Fri, 27 Dec 2019 16:52:37 +0800
+Subject: tcp: Fix highest_sack and highest_sack_seq
+
+From: Cambda Zhu <cambda@linux.alibaba.com>
+
+[ Upstream commit 853697504de043ff0bfd815bd3a64de1dce73dc7 ]
+
+>From commit 50895b9de1d3 ("tcp: highest_sack fix"), the logic about
+setting tp->highest_sack to the head of the send queue was removed.
+Of course the logic is error prone, but it is logical. Before we
+remove the pointer to the highest sack skb and use the seq instead,
+we need to set tp->highest_sack to NULL when there is no skb after
+the last sack, and then replace NULL with the real skb when new skb
+inserted into the rtx queue, because the NULL means the highest sack
+seq is tp->snd_nxt. If tp->highest_sack is NULL and new data sent,
+the next ACK with sack option will increase tp->reordering unexpectedly.
+
+This patch sets tp->highest_sack to the tail of the rtx queue if
+it's NULL and new data is sent. The patch keeps the rule that the
+highest_sack can only be maintained by sack processing, except for
+this only case.
+
+Fixes: 50895b9de1d3 ("tcp: highest_sack fix")
+Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -72,6 +72,9 @@ static void tcp_event_new_data_sent(stru
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
++ if (tp->highest_sack == NULL)
++ tp->highest_sack = skb;
++
+ tp->packets_out += tcp_skb_pcount(skb);
+ if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+ tcp_rearm_rto(sk);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:13 +0800
+Subject: tunnel: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ]
+
+When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No Change.
+v4: Update commit description
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c | 2 +-
+ net/ipv6/ip6_tunnel.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -505,7 +505,7 @@ static int tnl_update_pmtu(struct net_de
+ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ if (skb_valid_dst(skb))
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (!skb_is_gso(skb) &&
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -640,7 +640,7 @@ ip4ip6_err(struct sk_buff *skb, struct i
+ if (rel_info > dst_mtu(skb_dst(skb2)))
+ goto out;
+
+- skb_dst_update_pmtu(skb2, rel_info);
++ skb_dst_update_pmtu_no_confirm(skb2, rel_info);
+ }
+
+ icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+@@ -1132,7 +1132,7 @@ route_lookup:
+ mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+ IPV6_MIN_MTU : IPV4_MIN_MTU);
+
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Antonio Messina <amessina@google.com>
+Date: Thu, 19 Dec 2019 15:08:03 +0100
+Subject: udp: fix integer overflow while computing available space in sk_rcvbuf
+
+From: Antonio Messina <amessina@google.com>
+
+[ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ]
+
+When the size of the receive buffer for a socket is close to 2^31 when
+computing if we have enough space in the buffer to copy a packet from
+the queue to the buffer we might hit an integer overflow.
+
+When an user set net.core.rmem_default to a value close to 2^31 UDP
+packets are dropped because of this overflow. This can be visible, for
+instance, with failure to resolve hostnames.
+
+This can be fixed by casting sk_rcvbuf (which is an int) to unsigned
+int, similarly to how it is done in TCP.
+
+Signed-off-by: Antonio Messina <amessina@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1475,7 +1475,7 @@ int __udp_enqueue_schedule_skb(struct so
+ * queue contains some other skb
+ */
+ rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+- if (rmem > (size + sk->sk_rcvbuf))
++ if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
+ goto uncharge_drop;
+
+ spin_lock(&list->lock);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:32 PM CET
+From: Stefano Garzarella <sgarzare@redhat.com>
+Date: Fri, 6 Dec 2019 15:39:12 +0100
+Subject: vhost/vsock: accept only packets with the right dst_cid
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ]
+
+When we receive a new packet from the guest, we check if the
+src_cid is correct, but we forgot to check the dst_cid.
+
+The host should accept only packets where dst_cid is
+equal to the host CID.
+
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vsock.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -437,7 +437,9 @@ static void vhost_vsock_handle_tx_kick(s
+ virtio_transport_deliver_tap_pkt(pkt);
+
+ /* Only accept correctly addressed packets */
+- if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
++ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
++ le64_to_cpu(pkt->hdr.dst_cid) ==
++ vhost_transport_get_local_cid())
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:14 +0800
+Subject: vti: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although vti and vti6 are immune to this problem because they are IFF_NOARP
+interfaces, as Guillaume pointed. There is still no sense to confirm neighbour
+here.
+
+v5: Update commit description.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c | 2 +-
+ net/ipv6/ip6_vti.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -214,7 +214,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -479,7 +479,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)